<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
    <channel>
        <title>Voice Cloning on Producthunt daily</title>
        <link>https://producthunt.programnotes.cn/en/tags/voice-cloning/</link>
        <description>Recent content in Voice Cloning on Producthunt daily</description>
        <generator>Hugo -- gohugo.io</generator>
        <language>en</language>
        <lastBuildDate>Sat, 28 Mar 2026 07:51:53 +0000</lastBuildDate><atom:link href="https://producthunt.programnotes.cn/en/tags/voice-cloning/index.xml" rel="self" type="application/rss+xml" /><item>
        <title>Product Hunt Daily | 2026-03-28</title>
        <link>https://producthunt.programnotes.cn/en/p/product-hunt-daily-2026-03-28/</link>
        <pubDate>Sat, 28 Mar 2026 07:51:53 +0000</pubDate>
        
        <guid>https://producthunt.programnotes.cn/en/p/product-hunt-daily-2026-03-28/</guid>
        <description>&lt;img src="https://ph-files.imgix.net/b6ae0345-5f2a-4bba-82ab-0ba4e9090f9f.png?auto=format" alt="Featured image of post Product Hunt Daily | 2026-03-28" /&gt;&lt;h2 id=&#34;1-agentation&#34;&gt;1. Agentation
&lt;/h2&gt;&lt;p&gt;&lt;strong&gt;Tagline&lt;/strong&gt;: The visual feedback tool for AI agents&lt;br&gt;
&lt;strong&gt;Description&lt;/strong&gt;: Agentation turns UI annotations into structured context that AI coding agents can understand and act on. Click any element, add a note, and paste the output into Claude Code, Codex, or any AI tool.&lt;br&gt;
&lt;strong&gt;Website&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://www.producthunt.com/r/UBMKYT2Z76KGDI?utm_campaign=producthunt-api&amp;amp;utm_medium=api-v2&amp;amp;utm_source=Application%3A&amp;#43;weekly&amp;#43;%28ID%3A&amp;#43;148189%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;open&lt;/a&gt;&lt;br&gt;
&lt;strong&gt;Product Hunt&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://www.producthunt.com/products/agentation?utm_campaign=producthunt-api&amp;amp;utm_medium=api-v2&amp;amp;utm_source=Application%3A&amp;#43;weekly&amp;#43;%28ID%3A&amp;#43;148189%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;View on Product Hunt&lt;/a&gt;&lt;br&gt;
&lt;img src=&#34;https://ph-files.imgix.net/b6ae0345-5f2a-4bba-82ab-0ba4e9090f9f.png?auto=format&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;Agentation&#34;
	
	
&gt;&lt;br&gt;
&lt;strong&gt;Keyword&lt;/strong&gt;: AI agents, visual feedback, UI annotations, structured context, coding agents, developer tools&lt;br&gt;
&lt;strong&gt;VotesCount&lt;/strong&gt;: 🔺397&lt;br&gt;
&lt;strong&gt;Featured&lt;/strong&gt;: Yes&lt;br&gt;
&lt;strong&gt;CreatedAt&lt;/strong&gt;: 2026-03-27 07:01 AM (UTC)&lt;/p&gt;
&lt;hr&gt;
&lt;h2 id=&#34;2-claude-code-auto-fix&#34;&gt;2. Claude Code auto-fix
&lt;/h2&gt;&lt;p&gt;&lt;strong&gt;Tagline&lt;/strong&gt;: Auto-fix PRs in the cloud while you stay hands-off&lt;br&gt;
&lt;strong&gt;Description&lt;/strong&gt;: Claude Code auto-fix watches your pull requests in the cloud, resolving CI failures and review comments automatically. It pushes fixes, asks when needed, and keeps your PR green, so you can step away and come back to a ready-to-merge result.&lt;br&gt;
&lt;strong&gt;Website&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://www.producthunt.com/r/ZD7UARQER7Q65Z?utm_campaign=producthunt-api&amp;amp;utm_medium=api-v2&amp;amp;utm_source=Application%3A&amp;#43;weekly&amp;#43;%28ID%3A&amp;#43;148189%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;open&lt;/a&gt;&lt;br&gt;
&lt;strong&gt;Product Hunt&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://www.producthunt.com/products/claude-code-auto-fix-in-the-cloud?utm_campaign=producthunt-api&amp;amp;utm_medium=api-v2&amp;amp;utm_source=Application%3A&amp;#43;weekly&amp;#43;%28ID%3A&amp;#43;148189%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;View on Product Hunt&lt;/a&gt;&lt;br&gt;
&lt;img src=&#34;https://ph-files.imgix.net/e865c0c8-2b45-4c23-af78-29e3b46cfd74.png?auto=format&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;Claude Code auto-fix &#34;
	
	
&gt;&lt;br&gt;
&lt;strong&gt;Keyword&lt;/strong&gt;: AI code fix, automated PR review, CI failure resolution, cloud-based development, hands-off coding, auto-fix pull requests, continuous integration automation, Claude AI, merge-ready PRs, developer productivity&lt;br&gt;
&lt;strong&gt;VotesCount&lt;/strong&gt;: 🔺337&lt;br&gt;
&lt;strong&gt;Featured&lt;/strong&gt;: Yes&lt;br&gt;
&lt;strong&gt;CreatedAt&lt;/strong&gt;: 2026-03-27 07:01 AM (UTC)&lt;/p&gt;
&lt;hr&gt;
&lt;h2 id=&#34;3-gemini-31-flash-live&#34;&gt;3. Gemini 3.1 Flash Live
&lt;/h2&gt;&lt;p&gt;&lt;strong&gt;Tagline&lt;/strong&gt;: Making audio AI more natural and reliable&lt;br&gt;
&lt;strong&gt;Description&lt;/strong&gt;: Gemini 3.1 Flash Live is Google’s new state-of-the-art native audio model. Built for low-latency, real-time dialogue, it excels at complex reasoning and function calling. It is the exact engine currently powering Gemini Live and Google Search Live.&lt;br&gt;
&lt;strong&gt;Website&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://www.producthunt.com/r/5EGR7NSXB6V7QK?utm_campaign=producthunt-api&amp;amp;utm_medium=api-v2&amp;amp;utm_source=Application%3A&amp;#43;weekly&amp;#43;%28ID%3A&amp;#43;148189%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;open&lt;/a&gt;&lt;br&gt;
&lt;strong&gt;Product Hunt&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://www.producthunt.com/products/gemini-6?utm_campaign=producthunt-api&amp;amp;utm_medium=api-v2&amp;amp;utm_source=Application%3A&amp;#43;weekly&amp;#43;%28ID%3A&amp;#43;148189%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;View on Product Hunt&lt;/a&gt;&lt;br&gt;
&lt;img src=&#34;https://ph-files.imgix.net/78f8861e-e77a-4429-a85e-b95ba9892dd1.png?auto=format&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;Gemini 3.1 Flash Live&#34;
	
	
&gt;&lt;br&gt;
&lt;strong&gt;Keyword&lt;/strong&gt;: Gemini, audio AI, real-time dialogue, low-latency, natural conversation, reliable, Google AI, function calling, complex reasoning, Gemini Live, Google Search Live, native audio model&lt;br&gt;
&lt;strong&gt;VotesCount&lt;/strong&gt;: 🔺317&lt;br&gt;
&lt;strong&gt;Featured&lt;/strong&gt;: Yes&lt;br&gt;
&lt;strong&gt;CreatedAt&lt;/strong&gt;: 2026-03-27 07:01 AM (UTC)&lt;/p&gt;
&lt;hr&gt;
&lt;h2 id=&#34;4-insideorg&#34;&gt;4. InsideOrg
&lt;/h2&gt;&lt;p&gt;&lt;strong&gt;Tagline&lt;/strong&gt;: Free organization chart viewer for any company&lt;br&gt;
&lt;strong&gt;Description&lt;/strong&gt;: InsideOrg lets you enter any company domain and instantly see decision makers, reporting lines, and org structure for free. You don’t have to pay just to view a company’s org chart.&lt;br&gt;
&lt;strong&gt;Website&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://www.producthunt.com/r/SZCX7EGZ2ZDGUA?utm_campaign=producthunt-api&amp;amp;utm_medium=api-v2&amp;amp;utm_source=Application%3A&amp;#43;weekly&amp;#43;%28ID%3A&amp;#43;148189%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;open&lt;/a&gt;&lt;br&gt;
&lt;strong&gt;Product Hunt&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://www.producthunt.com/products/insideorg?utm_campaign=producthunt-api&amp;amp;utm_medium=api-v2&amp;amp;utm_source=Application%3A&amp;#43;weekly&amp;#43;%28ID%3A&amp;#43;148189%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;View on Product Hunt&lt;/a&gt;&lt;br&gt;
&lt;img src=&#34;https://ph-files.imgix.net/ac26ca21-6d9e-43e8-a867-4771ba68fe20.png?auto=format&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;InsideOrg&#34;
	
	
&gt;&lt;br&gt;
&lt;strong&gt;Keyword&lt;/strong&gt;: organization chart viewer, free org chart, company structure, decision makers, reporting lines, org structure viewer, company domain lookup, organizational chart, business hierarchy&lt;br&gt;
&lt;strong&gt;VotesCount&lt;/strong&gt;: 🔺312&lt;br&gt;
&lt;strong&gt;Featured&lt;/strong&gt;: Yes&lt;br&gt;
&lt;strong&gt;CreatedAt&lt;/strong&gt;: 2026-03-27 07:01 AM (UTC)&lt;/p&gt;
&lt;hr&gt;
&lt;h2 id=&#34;5-cockpit-ai&#34;&gt;5. Cockpit AI
&lt;/h2&gt;&lt;p&gt;&lt;strong&gt;Tagline&lt;/strong&gt;: Run revenue agents across every channel&lt;br&gt;
&lt;strong&gt;Description&lt;/strong&gt;: Deploy AI revenue agents that research prospects, personalize outreach, follow up across channels, and book meetings using your inbox, contacts, docs, and calendar.&lt;br&gt;
&lt;strong&gt;Website&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://www.producthunt.com/r/ZSATDD5TGHHICH?utm_campaign=producthunt-api&amp;amp;utm_medium=api-v2&amp;amp;utm_source=Application%3A&amp;#43;weekly&amp;#43;%28ID%3A&amp;#43;148189%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;open&lt;/a&gt;&lt;br&gt;
&lt;strong&gt;Product Hunt&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://www.producthunt.com/products/cockpit-ai?utm_campaign=producthunt-api&amp;amp;utm_medium=api-v2&amp;amp;utm_source=Application%3A&amp;#43;weekly&amp;#43;%28ID%3A&amp;#43;148189%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;View on Product Hunt&lt;/a&gt;&lt;br&gt;
&lt;img src=&#34;https://ph-files.imgix.net/b64992a0-5148-4f25-a80b-46dbd3dd57c3.png?auto=format&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;Cockpit AI&#34;
	
	
&gt;&lt;br&gt;
&lt;strong&gt;Keyword&lt;/strong&gt;: AI sales agents, automated outreach, prospect research, personalized follow-up, multichannel engagement, meeting booking, revenue automation, CRM integration, sales automation, AI assistant&lt;br&gt;
&lt;strong&gt;VotesCount&lt;/strong&gt;: 🔺305&lt;br&gt;
&lt;strong&gt;Featured&lt;/strong&gt;: Yes&lt;br&gt;
&lt;strong&gt;CreatedAt&lt;/strong&gt;: 2026-03-27 07:01 AM (UTC)&lt;/p&gt;
&lt;hr&gt;
&lt;h2 id=&#34;6-codex-plugins&#34;&gt;6. Codex Plugins
&lt;/h2&gt;&lt;p&gt;&lt;strong&gt;Tagline&lt;/strong&gt;: Package Codex skills and app integrations as plugins&lt;br&gt;
&lt;strong&gt;Description&lt;/strong&gt;: Codex Plugins package skills, app integrations, and workflows into reusable, installable bundles for teams and developers. Seamlessly connect tools like Slack, Figma, Notion, and Google Drive to streamline planning, research, coding, and post-work workflows. Build, share, and scale consistent workflows across projects with built-in skills, authentication, and integrations.&lt;br&gt;
&lt;strong&gt;Website&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://www.producthunt.com/r/MXXCFXKHV6GFJT?utm_campaign=producthunt-api&amp;amp;utm_medium=api-v2&amp;amp;utm_source=Application%3A&amp;#43;weekly&amp;#43;%28ID%3A&amp;#43;148189%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;open&lt;/a&gt;&lt;br&gt;
&lt;strong&gt;Product Hunt&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://www.producthunt.com/products/openai?utm_campaign=producthunt-api&amp;amp;utm_medium=api-v2&amp;amp;utm_source=Application%3A&amp;#43;weekly&amp;#43;%28ID%3A&amp;#43;148189%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;View on Product Hunt&lt;/a&gt;&lt;br&gt;
&lt;img src=&#34;https://ph-files.imgix.net/f13f6325-d169-413c-89e7-6b48ccfb45c3.jpeg?auto=format&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;Codex Plugins&#34;
	
	
&gt;&lt;br&gt;
&lt;strong&gt;Keyword&lt;/strong&gt;: plugins, app integrations, workflows, reusable bundles, team collaboration, developer tools, Slack, Figma, Notion, Google Drive, automation, productivity, workflow scaling, Codex skills&lt;br&gt;
&lt;strong&gt;VotesCount&lt;/strong&gt;: 🔺193&lt;br&gt;
&lt;strong&gt;Featured&lt;/strong&gt;: Yes&lt;br&gt;
&lt;strong&gt;CreatedAt&lt;/strong&gt;: 2026-03-27 07:01 AM (UTC)&lt;/p&gt;
&lt;hr&gt;
&lt;h2 id=&#34;7-suno-v55&#34;&gt;7. Suno v5.5
&lt;/h2&gt;&lt;p&gt;&lt;strong&gt;Tagline&lt;/strong&gt;: Create with your voice, tune models to your sound&lt;br&gt;
&lt;strong&gt;Description&lt;/strong&gt;: Suno v5.5 is its most personal music model yet. Use your own voice, train custom models on your catalog, and let My Taste learn what you actually like, so the songs feel less generic and much more like you.&lt;br&gt;
&lt;strong&gt;Website&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://www.producthunt.com/r/S25VKIT6BZUNGW?utm_campaign=producthunt-api&amp;amp;utm_medium=api-v2&amp;amp;utm_source=Application%3A&amp;#43;weekly&amp;#43;%28ID%3A&amp;#43;148189%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;open&lt;/a&gt;&lt;br&gt;
&lt;strong&gt;Product Hunt&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://www.producthunt.com/products/suno?utm_campaign=producthunt-api&amp;amp;utm_medium=api-v2&amp;amp;utm_source=Application%3A&amp;#43;weekly&amp;#43;%28ID%3A&amp;#43;148189%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;View on Product Hunt&lt;/a&gt;&lt;br&gt;
&lt;img src=&#34;https://ph-files.imgix.net/4e8da565-109e-499f-819b-175853a79d27.jpeg?auto=format&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;Suno v5.5&#34;
	
	
&gt;&lt;br&gt;
&lt;strong&gt;Keyword&lt;/strong&gt;: AI music generator, voice cloning, custom music model, personalized songs, train AI on your voice, music creation, My Taste feature, Suno AI, create music with voice, unique sound&lt;br&gt;
&lt;strong&gt;VotesCount&lt;/strong&gt;: 🔺192&lt;br&gt;
&lt;strong&gt;Featured&lt;/strong&gt;: Yes&lt;br&gt;
&lt;strong&gt;CreatedAt&lt;/strong&gt;: 2026-03-27 07:01 AM (UTC)&lt;/p&gt;
&lt;hr&gt;
&lt;h2 id=&#34;8-stripe-projects&#34;&gt;8. Stripe Projects
&lt;/h2&gt;&lt;p&gt;&lt;strong&gt;Tagline&lt;/strong&gt;: Production-ready dev stack from your terminal&lt;br&gt;
&lt;strong&gt;Description&lt;/strong&gt;: Set up hosting, databases, auth, AI, observability, analytics, and more from the CLI. Stripe Projects gives developers and coding agents a reliable way to provision real services, manage credentials, and keep track of usage across the stack.&lt;br&gt;
&lt;strong&gt;Website&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://www.producthunt.com/r/CNUQWRUNOD3BAQ?utm_campaign=producthunt-api&amp;amp;utm_medium=api-v2&amp;amp;utm_source=Application%3A&amp;#43;weekly&amp;#43;%28ID%3A&amp;#43;148189%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;open&lt;/a&gt;&lt;br&gt;
&lt;strong&gt;Product Hunt&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://www.producthunt.com/products/stripe-projects?utm_campaign=producthunt-api&amp;amp;utm_medium=api-v2&amp;amp;utm_source=Application%3A&amp;#43;weekly&amp;#43;%28ID%3A&amp;#43;148189%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;View on Product Hunt&lt;/a&gt;&lt;br&gt;
&lt;img src=&#34;https://ph-files.imgix.net/bfb9ebc8-845c-446a-8a7f-cc930c283046.jpeg?auto=format&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;Stripe Projects&#34;
	
	
&gt;&lt;br&gt;
&lt;strong&gt;Keyword&lt;/strong&gt;: Stripe, CLI, developer tools, hosting, databases, authentication, AI, observability, analytics, dev stack, provisioning, infrastructure, coding agents, terminal&lt;br&gt;
&lt;strong&gt;VotesCount&lt;/strong&gt;: 🔺170&lt;br&gt;
&lt;strong&gt;Featured&lt;/strong&gt;: Yes&lt;br&gt;
&lt;strong&gt;CreatedAt&lt;/strong&gt;: 2026-03-27 07:01 AM (UTC)&lt;/p&gt;
&lt;hr&gt;
&lt;h2 id=&#34;9-voxtral-tts-by-mistral-ai&#34;&gt;9. Voxtral TTS by Mistral AI
&lt;/h2&gt;&lt;p&gt;&lt;strong&gt;Tagline&lt;/strong&gt;: Multilingual TTS model with realistic and expressive speech&lt;br&gt;
&lt;strong&gt;Description&lt;/strong&gt;: Voxtral TTS is Mistral AI&amp;rsquo;s first text-to-speech model with state-of-the-art multilingual text-to-speech with realistic, emotionally expressive voices. Low latency, voice cloning, and support for 9 languages make it ideal for scalable voice agents and enterprise workflows.&lt;br&gt;
&lt;strong&gt;Website&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://www.producthunt.com/r/627BPOH5ZBLWMR?utm_campaign=producthunt-api&amp;amp;utm_medium=api-v2&amp;amp;utm_source=Application%3A&amp;#43;weekly&amp;#43;%28ID%3A&amp;#43;148189%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;open&lt;/a&gt;&lt;br&gt;
&lt;strong&gt;Product Hunt&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://www.producthunt.com/products/mistral-7b?utm_campaign=producthunt-api&amp;amp;utm_medium=api-v2&amp;amp;utm_source=Application%3A&amp;#43;weekly&amp;#43;%28ID%3A&amp;#43;148189%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;View on Product Hunt&lt;/a&gt;&lt;br&gt;
&lt;img src=&#34;https://ph-files.imgix.net/8d9384c0-f31b-4e6d-ac91-317baef56b43.png?auto=format&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;Voxtral TTS by Mistral AI&#34;
	
	
&gt;&lt;br&gt;
&lt;strong&gt;Keyword&lt;/strong&gt;: Voxtral, Mistral AI, TTS, text-to-speech, multilingual, realistic speech, expressive voices, voice cloning, low latency, voice agents, enterprise workflows, scalable&lt;br&gt;
&lt;strong&gt;VotesCount&lt;/strong&gt;: 🔺163&lt;br&gt;
&lt;strong&gt;Featured&lt;/strong&gt;: Yes&lt;br&gt;
&lt;strong&gt;CreatedAt&lt;/strong&gt;: 2026-03-27 07:01 AM (UTC)&lt;/p&gt;
&lt;hr&gt;
&lt;h2 id=&#34;10-audos-publishing-house&#34;&gt;10. Audos Publishing House
&lt;/h2&gt;&lt;p&gt;&lt;strong&gt;Tagline&lt;/strong&gt;: Build an AI business, get up to $100K. No equity taken&lt;br&gt;
&lt;strong&gt;Description&lt;/strong&gt;: Audos Publishing House helps everyday entrepreneurs build million-dollar AI-native businesses with tools, mentorship, and up to $100K in funding - for 0% equity. From the team behind BarkBox and Ro. Now supercharged by the acquisition of No Cap, the world&amp;rsquo;s first AI investor.&lt;br&gt;
&lt;strong&gt;Website&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://www.producthunt.com/r/PJFD3R55EDQXRH?utm_campaign=producthunt-api&amp;amp;utm_medium=api-v2&amp;amp;utm_source=Application%3A&amp;#43;weekly&amp;#43;%28ID%3A&amp;#43;148189%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;open&lt;/a&gt;&lt;br&gt;
&lt;strong&gt;Product Hunt&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://www.producthunt.com/products/socap?utm_campaign=producthunt-api&amp;amp;utm_medium=api-v2&amp;amp;utm_source=Application%3A&amp;#43;weekly&amp;#43;%28ID%3A&amp;#43;148189%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;View on Product Hunt&lt;/a&gt;&lt;br&gt;
&lt;img src=&#34;https://ph-files.imgix.net/d8412d5e-922d-4936-9cd7-6122c9f04f16.png?auto=format&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;Audos Publishing House&#34;
	
	
&gt;&lt;br&gt;
&lt;strong&gt;Keyword&lt;/strong&gt;: AI business funding, no equity, AI startup funding, entrepreneur tools, mentorship, Audos Publishing House, AI investor, build AI business, $100K funding, zero equity, AI-native business, No Cap acquisition, startup capital&lt;br&gt;
&lt;strong&gt;VotesCount&lt;/strong&gt;: 🔺155&lt;br&gt;
&lt;strong&gt;Featured&lt;/strong&gt;: Yes&lt;br&gt;
&lt;strong&gt;CreatedAt&lt;/strong&gt;: 2026-03-27 07:01 AM (UTC)&lt;/p&gt;
&lt;hr&gt;
</description>
        </item>
        <item>
        <title>ebook2audiobook</title>
        <link>https://producthunt.programnotes.cn/en/p/ebook2audiobook/</link>
        <pubDate>Wed, 22 Oct 2025 15:28:41 +0800</pubDate>
        
        <guid>https://producthunt.programnotes.cn/en/p/ebook2audiobook/</guid>
        <description>&lt;img src="https://images.unsplash.com/photo-1708000590735-6aee991a7b29?ixid=M3w0NjAwMjJ8MHwxfHJhbmRvbXx8fHx8fHx8fDE3NjExMTgwNzZ8&amp;ixlib=rb-4.1.0" alt="Featured image of post ebook2audiobook" /&gt;&lt;h1 id=&#34;drewthomassonebook2audiobook&#34;&gt;&lt;a class=&#34;link&#34; href=&#34;https://github.com/DrewThomasson/ebook2audiobook&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;DrewThomasson/ebook2audiobook&lt;/a&gt;
&lt;/h1&gt;&lt;h1 id=&#34;-ebook2audiobook&#34;&gt;📚 ebook2audiobook
&lt;/h1&gt;&lt;p&gt;CPU/GPU Converter from eBooks to audiobooks with chapters and metadata&lt;br/&gt;
using XTTSv2, Bark, Vits, Fairseq, YourTTS, Tacotron and more. Supports voice cloning and +1110 languages!&lt;/p&gt;
&lt;blockquote&gt;
&lt;p&gt;[!IMPORTANT]
&lt;strong&gt;This tool is intended for use with non-DRM, legally acquired eBooks only.&lt;/strong&gt; &lt;br&gt;
The authors are not responsible for any misuse of this software or any resulting legal consequences. &lt;br&gt;
Use this tool responsibly and in accordance with all applicable laws.&lt;/p&gt;
&lt;/blockquote&gt;
&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;https://discord.gg/63Tv3F65k6&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;img src=&#34;https://dcbadge.limes.pink/api/server/https://discord.gg/63Tv3F65k6&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;Discord&#34;
	
	
&gt;&lt;/a&gt;&lt;/p&gt;
&lt;h3 id=&#34;thanks-to-support-ebook2audiobook-developers&#34;&gt;Thanks to support ebook2audiobook developers!
&lt;/h3&gt;&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;https://ko-fi.com/athomasson2&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;img src=&#34;https://img.shields.io/badge/Ko--fi-F16061?style=for-the-badge&amp;amp;logo=ko-fi&amp;amp;logoColor=white&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;Ko-Fi&#34;
	
	
&gt;&lt;/a&gt;&lt;/p&gt;
&lt;h3 id=&#34;run-locally&#34;&gt;Run locally
&lt;/h3&gt;&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;#launching-gradio-web-interface&#34; &gt;&lt;img src=&#34;https://img.shields.io/badge/Quick%20Start-blue?style=for-the-badge&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;Quick Start&#34;
	
	
&gt;&lt;/a&gt;&lt;/p&gt;
&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;https://github.com/DrewThomasson/ebook2audiobook/actions/workflows/Docker-Build.yml&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;img src=&#34;https://github.com/DrewThomasson/ebook2audiobook/actions/workflows/Docker-Build.yml/badge.svg&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;Docker Build&#34;
	
	
&gt;&lt;/a&gt;  &lt;a class=&#34;link&#34; href=&#34;https://github.com/DrewThomasson/ebook2audiobook/releases/latest&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;img src=&#34;https://img.shields.io/badge/Download-Now-blue.svg&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;Download&#34;
	
	
&gt;&lt;/a&gt;&lt;/p&gt;
&lt;a href=&#34;https://github.com/DrewThomasson/ebook2audiobook&#34;&gt;
  &lt;img src=&#34;https://img.shields.io/badge/Platform-mac%20|%20linux%20|%20windows-lightgrey&#34; alt=&#34;Platform&#34;&gt;
&lt;/a&gt;&lt;a href=&#34;https://hub.docker.com/r/athomasson2/ebook2audiobook&#34;&gt;
&lt;img alt=&#34;Docker Pull Count&#34; src=&#34;https://img.shields.io/docker/pulls/athomasson2/ebook2audiobook.svg&#34;/&gt;
&lt;/a&gt;
&lt;h3 id=&#34;run-remotely&#34;&gt;Run Remotely
&lt;/h3&gt;&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;https://huggingface.co/spaces/drewThomasson/ebook2audiobook&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;img src=&#34;https://img.shields.io/badge/Hugging%20Face-Spaces-yellow?style=flat&amp;amp;logo=huggingface&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;Hugging Face&#34;
	
	
&gt;&lt;/a&gt;
&lt;a class=&#34;link&#34; href=&#34;https://colab.research.google.com/github/DrewThomasson/ebook2audiobook/blob/main/Notebooks/colab_ebook2audiobook.ipynb&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;img src=&#34;https://colab.research.google.com/assets/colab-badge.svg&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;Free Google Colab&#34;
	
	
&gt;&lt;/a&gt; &lt;a class=&#34;link&#34; href=&#34;https://github.com/Rihcus/ebook2audiobookXTTS/blob/main/Notebooks/kaggle-ebook2audiobook.ipynb&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;img src=&#34;https://img.shields.io/badge/Kaggle-035a7d?style=flat&amp;amp;logo=kaggle&amp;amp;logoColor=white&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;Kaggle&#34;
	
	
&gt;&lt;/a&gt;&lt;/p&gt;
&lt;h4 id=&#34;gui-interface&#34;&gt;GUI Interface
&lt;/h4&gt;&lt;p&gt;&lt;img src=&#34;https://producthunt.programnotes.cn/assets/demo_web_gui.gif&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;demo_web_gui&#34;
	
	
&gt;&lt;/p&gt;
&lt;details&gt;
  &lt;summary&gt;Click to see images of Web GUI&lt;/summary&gt;
  &lt;img width=&#34;1728&#34; alt=&#34;GUI Screen 1&#34; src=&#34;assets/gui_1.png&#34;&gt;
  &lt;img width=&#34;1728&#34; alt=&#34;GUI Screen 2&#34; src=&#34;assets/gui_2.png&#34;&gt;
  &lt;img width=&#34;1728&#34; alt=&#34;GUI Screen 3&#34; src=&#34;assets/gui_3.png&#34;&gt;
&lt;/details&gt;
&lt;h2 id=&#34;demos&#34;&gt;Demos
&lt;/h2&gt;&lt;p&gt;&lt;strong&gt;New Default Voice Demo&lt;/strong&gt;&lt;/p&gt;
&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;https://github.com/user-attachments/assets/750035dc-e355-46f1-9286-05c1d9e88cea&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;https://github.com/user-attachments/assets/750035dc-e355-46f1-9286-05c1d9e88cea&lt;/a&gt;&lt;/p&gt;
&lt;details&gt;
  &lt;summary&gt;More Demos&lt;/summary&gt;
&lt;p&gt;&lt;strong&gt;ASMR Voice&lt;/strong&gt;&lt;/p&gt;
&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;https://github.com/user-attachments/assets/68eee9a1-6f71-4903-aacd-47397e47e422&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;https://github.com/user-attachments/assets/68eee9a1-6f71-4903-aacd-47397e47e422&lt;/a&gt;&lt;/p&gt;
&lt;p&gt;&lt;strong&gt;Rainy Day Voice&lt;/strong&gt;&lt;/p&gt;
&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;https://github.com/user-attachments/assets/d25034d9-c77f-43a9-8f14-0d167172b080&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;https://github.com/user-attachments/assets/d25034d9-c77f-43a9-8f14-0d167172b080&lt;/a&gt;&lt;/p&gt;
&lt;p&gt;&lt;strong&gt;Scarlett Voice&lt;/strong&gt;&lt;/p&gt;
&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;https://github.com/user-attachments/assets/b12009ee-ec0d-45ce-a1ef-b3a52b9f8693&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;https://github.com/user-attachments/assets/b12009ee-ec0d-45ce-a1ef-b3a52b9f8693&lt;/a&gt;&lt;/p&gt;
&lt;p&gt;&lt;strong&gt;David Attenborough Voice&lt;/strong&gt;&lt;/p&gt;
&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;https://github.com/user-attachments/assets/81c4baad-117e-4db5-ac86-efc2b7fea921&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;https://github.com/user-attachments/assets/81c4baad-117e-4db5-ac86-efc2b7fea921&lt;/a&gt;&lt;/p&gt;
&lt;p&gt;&lt;strong&gt;Example&lt;/strong&gt;&lt;/p&gt;
&lt;p&gt;&lt;img src=&#34;https://github.com/DrewThomasson/VoxNovel/blob/dc5197dff97252fa44c391dc0596902d71278a88/readme_files/example_in_app.jpeg&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;Example&#34;
	
	
&gt;&lt;/p&gt;
&lt;/details&gt;
&lt;h2 id=&#34;readmemd&#34;&gt;README.md
&lt;/h2&gt;&lt;h2 id=&#34;table-of-contents&#34;&gt;Table of Contents
&lt;/h2&gt;&lt;ul&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#-ebook2audiobook&#34; &gt;ebook2audiobook&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#features&#34; &gt;Features&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#gui-interface&#34; &gt;GUI Interface&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#demos&#34; &gt;Demos&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#supported-languages&#34; &gt;Supported Languages&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#hardware-requirements&#34; &gt;Minimum Requirements&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#launching-gradio-web-interface&#34; &gt;Usage&lt;/a&gt;
&lt;ul&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#launching-gradio-web-interface&#34; &gt;Run Locally&lt;/a&gt;
&lt;ul&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#launching-gradio-web-interface&#34; &gt;Launching Gradio Web Interface&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#basic--usage&#34; &gt;Basic Headless Usage&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#example-of-custom-model-zip-upload&#34; &gt;Headless Custom XTTS Model Usage&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#help-command-output&#34; &gt;Help command output&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#run-remotely&#34; &gt;Run Remotely&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#fine-tuned-tts-models&#34; &gt;Fine Tuned TTS models&lt;/a&gt;
&lt;ul&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#fine-tuned-tts-collection&#34; &gt;Collection of Fine-Tuned TTS Models&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#fine-tune-your-own-xttsv2-model&#34; &gt;Train XTTSv2&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#docker-gpu-options&#34; &gt;Docker&lt;/a&gt;
&lt;ul&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#docker-gpu-options&#34; &gt;GPU options&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#running-the-pre-built-docker-container&#34; &gt;Docker Run&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#building-the-docker-container&#34; &gt;Docker Build&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#docker-compose&#34; &gt;Docker Compose&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#docker-headless-guide&#34; &gt;Docker headless guide&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#docker-container-file-locations&#34; &gt;Docker container file locations&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#common-docker-issues&#34; &gt;Common Docker issues&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#supported-ebook-formats&#34; &gt;Supported eBook Formats&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#output-formats&#34; &gt;Output Formats&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#updating-to-latest-version&#34; &gt;Updating to Latest Version&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#reverting-to-older-versions&#34; &gt;Revert to older Version&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#common-issues&#34; &gt;Common Issues&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#special-thanks&#34; &gt;Special Thanks&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#table-of-contents&#34; &gt;Table of Contents&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;h2 id=&#34;features&#34;&gt;Features
&lt;/h2&gt;&lt;ul&gt;
&lt;li&gt;📚 Splits eBook into chapters for organized audio.&lt;/li&gt;
&lt;li&gt;🎙️ High-quality text-to-speech with &lt;a class=&#34;link&#34; href=&#34;https://huggingface.co/coqui/XTTS-v2&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;Coqui XTTSv2&lt;/a&gt; and &lt;a class=&#34;link&#34; href=&#34;https://github.com/facebookresearch/fairseq/tree/main/examples/mms&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;Fairseq&lt;/a&gt; (and more).&lt;/li&gt;
&lt;li&gt;🗣️ Optional voice cloning with your own voice file.&lt;/li&gt;
&lt;li&gt;🌍 Supports +1110 languages (English by default). &lt;a class=&#34;link&#34; href=&#34;https://dl.fbaipublicfiles.com/mms/tts/all-tts-languages.html&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;List of Supported languages&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;🖥️ Designed to run on 4GB RAM.&lt;/li&gt;
&lt;/ul&gt;
&lt;h2 id=&#34;supported-languages&#34;&gt;Supported Languages
&lt;/h2&gt;&lt;table&gt;
  &lt;thead&gt;
      &lt;tr&gt;
          &lt;th style=&#34;text-align: center&#34;&gt;&lt;strong&gt;Arabic (ar)&lt;/strong&gt;&lt;/th&gt;
          &lt;th style=&#34;text-align: center&#34;&gt;&lt;strong&gt;Chinese (zh)&lt;/strong&gt;&lt;/th&gt;
          &lt;th style=&#34;text-align: center&#34;&gt;&lt;strong&gt;English (en)&lt;/strong&gt;&lt;/th&gt;
          &lt;th style=&#34;text-align: center&#34;&gt;&lt;strong&gt;Spanish (es)&lt;/strong&gt;&lt;/th&gt;
      &lt;/tr&gt;
  &lt;/thead&gt;
  &lt;tbody&gt;
      &lt;tr&gt;
          &lt;td style=&#34;text-align: center&#34;&gt;&lt;strong&gt;French (fr)&lt;/strong&gt;&lt;/td&gt;
          &lt;td style=&#34;text-align: center&#34;&gt;&lt;strong&gt;German (de)&lt;/strong&gt;&lt;/td&gt;
          &lt;td style=&#34;text-align: center&#34;&gt;&lt;strong&gt;Italian (it)&lt;/strong&gt;&lt;/td&gt;
          &lt;td style=&#34;text-align: center&#34;&gt;&lt;strong&gt;Portuguese (pt)&lt;/strong&gt;&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td style=&#34;text-align: center&#34;&gt;&lt;strong&gt;Polish (pl)&lt;/strong&gt;&lt;/td&gt;
          &lt;td style=&#34;text-align: center&#34;&gt;&lt;strong&gt;Turkish (tr)&lt;/strong&gt;&lt;/td&gt;
          &lt;td style=&#34;text-align: center&#34;&gt;&lt;strong&gt;Russian (ru)&lt;/strong&gt;&lt;/td&gt;
          &lt;td style=&#34;text-align: center&#34;&gt;&lt;strong&gt;Dutch (nl)&lt;/strong&gt;&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td style=&#34;text-align: center&#34;&gt;&lt;strong&gt;Czech (cs)&lt;/strong&gt;&lt;/td&gt;
          &lt;td style=&#34;text-align: center&#34;&gt;&lt;strong&gt;Japanese (ja)&lt;/strong&gt;&lt;/td&gt;
          &lt;td style=&#34;text-align: center&#34;&gt;&lt;strong&gt;Hindi (hi)&lt;/strong&gt;&lt;/td&gt;
          &lt;td style=&#34;text-align: center&#34;&gt;&lt;strong&gt;Bengali (bn)&lt;/strong&gt;&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td style=&#34;text-align: center&#34;&gt;&lt;strong&gt;Hungarian (hu)&lt;/strong&gt;&lt;/td&gt;
          &lt;td style=&#34;text-align: center&#34;&gt;&lt;strong&gt;Korean (ko)&lt;/strong&gt;&lt;/td&gt;
          &lt;td style=&#34;text-align: center&#34;&gt;&lt;strong&gt;Vietnamese (vi)&lt;/strong&gt;&lt;/td&gt;
          &lt;td style=&#34;text-align: center&#34;&gt;&lt;strong&gt;Swedish (sv)&lt;/strong&gt;&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td style=&#34;text-align: center&#34;&gt;&lt;strong&gt;Persian (fa)&lt;/strong&gt;&lt;/td&gt;
          &lt;td style=&#34;text-align: center&#34;&gt;&lt;strong&gt;Yoruba (yo)&lt;/strong&gt;&lt;/td&gt;
          &lt;td style=&#34;text-align: center&#34;&gt;&lt;strong&gt;Swahili (sw)&lt;/strong&gt;&lt;/td&gt;
          &lt;td style=&#34;text-align: center&#34;&gt;&lt;strong&gt;Indonesian (id)&lt;/strong&gt;&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td style=&#34;text-align: center&#34;&gt;&lt;strong&gt;Slovak (sk)&lt;/strong&gt;&lt;/td&gt;
          &lt;td style=&#34;text-align: center&#34;&gt;&lt;strong&gt;Croatian (hr)&lt;/strong&gt;&lt;/td&gt;
          &lt;td style=&#34;text-align: center&#34;&gt;&lt;strong&gt;Tamil (ta)&lt;/strong&gt;&lt;/td&gt;
          &lt;td style=&#34;text-align: center&#34;&gt;&lt;strong&gt;Danish (da)&lt;/strong&gt;&lt;/td&gt;
      &lt;/tr&gt;
  &lt;/tbody&gt;
&lt;/table&gt;
&lt;ul&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;https://dl.fbaipublicfiles.com/mms/tts/all-tts-languages.html&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;strong&gt;+1100 languages and dialects here&lt;/strong&gt;&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;h2 id=&#34;hardware-requirements&#34;&gt;Hardware Requirements
&lt;/h2&gt;&lt;ul&gt;
&lt;li&gt;4gb RAM minimum, 8GB recommended&lt;/li&gt;
&lt;li&gt;Virtualization enabled if running on windows (Docker only)&lt;/li&gt;
&lt;li&gt;CPU (intel, AMD, ARM), GPU (Nvidia, AMD*, Intel*) (Recommended), MPS (Apple Silicon CPU)
*available very soon&lt;/li&gt;
&lt;/ul&gt;
&lt;blockquote&gt;
&lt;p&gt;[!IMPORTANT]
&lt;strong&gt;Before to post an install or bug issue search carefully to the opened and closed issues TAB&lt;br&gt;
to be sure your issue does not exist already.&lt;/strong&gt;&lt;/p&gt;
&lt;/blockquote&gt;
&lt;blockquote&gt;
&lt;p&gt;[!NOTE]
&lt;strong&gt;Lacking of any standards structure like what is a chapter, paragraph, preface etc.&lt;br&gt;
you should first remove manually any text you don&amp;rsquo;t want to be converted in audio.&lt;/strong&gt;&lt;/p&gt;
&lt;/blockquote&gt;
&lt;h3 id=&#34;installation-instructions&#34;&gt;Installation Instructions
&lt;/h3&gt;&lt;ol&gt;
&lt;li&gt;&lt;strong&gt;Clone repo&lt;/strong&gt;&lt;/li&gt;
&lt;/ol&gt;
&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;2
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-bash&#34; data-lang=&#34;bash&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;git clone https://github.com/DrewThomasson/ebook2audiobook.git
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;&lt;span class=&#34;nb&#34;&gt;cd&lt;/span&gt; ebook2audiobook
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;h3 id=&#34;launching-gradio-web-interface&#34;&gt;Launching Gradio Web Interface
&lt;/h3&gt;&lt;ol&gt;
&lt;li&gt;
&lt;p&gt;&lt;strong&gt;Run ebook2audiobook&lt;/strong&gt;:&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;
&lt;p&gt;&lt;strong&gt;Linux/MacOS&lt;/strong&gt;&lt;/p&gt;
&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-bash&#34; data-lang=&#34;bash&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;./ebook2audiobook.sh  &lt;span class=&#34;c1&#34;&gt;# Run launch script&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;&lt;strong&gt;Mac Launcher&lt;/strong&gt;&lt;br&gt;
Double click &lt;code&gt;Mac Ebook2Audiobook Launcher.command&lt;/code&gt;&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;&lt;strong&gt;Windows&lt;/strong&gt;&lt;/p&gt;
&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-bash&#34; data-lang=&#34;bash&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;ebook2audiobook.cmd  &lt;span class=&#34;c1&#34;&gt;# Run launch script or double click on it&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;&lt;strong&gt;Windows Launcher&lt;/strong&gt;&lt;br&gt;
Double click &lt;code&gt;ebook2audiobook.cmd&lt;/code&gt;&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;&lt;strong&gt;Manual Python Install&lt;/strong&gt;&lt;/p&gt;
&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;2
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;3
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;4
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;5
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-bash&#34; data-lang=&#34;bash&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;&lt;span class=&#34;c1&#34;&gt;# (for experts only!)&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;&lt;span class=&#34;nv&#34;&gt;REQUIRED_PROGRAMS&lt;/span&gt;&lt;span class=&#34;o&#34;&gt;=(&lt;/span&gt;&lt;span class=&#34;s2&#34;&gt;&amp;#34;calibre&amp;#34;&lt;/span&gt; &lt;span class=&#34;s2&#34;&gt;&amp;#34;ffmpeg&amp;#34;&lt;/span&gt; &lt;span class=&#34;s2&#34;&gt;&amp;#34;nodejs&amp;#34;&lt;/span&gt; &lt;span class=&#34;s2&#34;&gt;&amp;#34;mecab&amp;#34;&lt;/span&gt; &lt;span class=&#34;s2&#34;&gt;&amp;#34;espeak-ng&amp;#34;&lt;/span&gt; &lt;span class=&#34;s2&#34;&gt;&amp;#34;rust&amp;#34;&lt;/span&gt; &lt;span class=&#34;s2&#34;&gt;&amp;#34;sox&amp;#34;&lt;/span&gt;&lt;span class=&#34;o&#34;&gt;)&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;&lt;span class=&#34;nv&#34;&gt;REQUIRED_PYTHON_VERSION&lt;/span&gt;&lt;span class=&#34;o&#34;&gt;=&lt;/span&gt;&lt;span class=&#34;s2&#34;&gt;&amp;#34;3.12&amp;#34;&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;pip install -r requirements.txt  &lt;span class=&#34;c1&#34;&gt;# Install Python Requirements&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;python app.py  &lt;span class=&#34;c1&#34;&gt;# Run Ebook2Audiobook&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;&lt;strong&gt;Open the Web App&lt;/strong&gt;: Click the URL provided in the terminal to access the web app and convert eBooks. &lt;code&gt;http://localhost:7860/&lt;/code&gt;&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;&lt;strong&gt;For Public Link&lt;/strong&gt;:
&lt;code&gt;python app.py --share&lt;/code&gt; (all OS)
&lt;code&gt;./ebook2audiobook.sh --share&lt;/code&gt; (Linux/MacOS)
&lt;code&gt;ebook2audiobook.cmd --share&lt;/code&gt; (Windows)&lt;/p&gt;
&lt;/li&gt;
&lt;/ol&gt;
&lt;blockquote&gt;
&lt;p&gt;[!IMPORTANT]
&lt;strong&gt;If the script is stopped and run again, you need to refresh your gradio GUI interface&lt;br&gt;
to let the web page reconnect to the new connection socket.&lt;/strong&gt;&lt;/p&gt;
&lt;/blockquote&gt;
&lt;h3 id=&#34;basic--usage&#34;&gt;Basic  Usage
&lt;/h3&gt;&lt;ul&gt;
&lt;li&gt;
&lt;p&gt;&lt;strong&gt;Linux/MacOS&lt;/strong&gt;:&lt;/p&gt;
&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;2
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-bash&#34; data-lang=&#34;bash&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;./ebook2audiobook.sh --headless --ebook &amp;lt;path_to_ebook_file&amp;gt; &lt;span class=&#34;se&#34;&gt;\
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;    --voice &lt;span class=&#34;o&#34;&gt;[&lt;/span&gt;path_to_voice_file&lt;span class=&#34;o&#34;&gt;]&lt;/span&gt; --language &lt;span class=&#34;o&#34;&gt;[&lt;/span&gt;language_code&lt;span class=&#34;o&#34;&gt;]&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;&lt;strong&gt;Windows&lt;/strong&gt;&lt;/p&gt;
&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;2
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-bash&#34; data-lang=&#34;bash&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;ebook2audiobook.cmd --headless --ebook &amp;lt;path_to_ebook_file&amp;gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;    --voice &lt;span class=&#34;o&#34;&gt;[&lt;/span&gt;path_to_voice_file&lt;span class=&#34;o&#34;&gt;]&lt;/span&gt; --language &lt;span class=&#34;o&#34;&gt;[&lt;/span&gt;language_code&lt;span class=&#34;o&#34;&gt;]&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;&lt;strong&gt;[&amp;ndash;ebook]&lt;/strong&gt;: Path to your eBook file&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;&lt;strong&gt;[&amp;ndash;voice]&lt;/strong&gt;: Voice cloning file path (optional)&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;&lt;strong&gt;[&amp;ndash;language]&lt;/strong&gt;: Language code in ISO-639-3 (i.e.: ita for italian, eng for english, deu for german&amp;hellip;).&lt;br&gt;
Default language is eng and &amp;ndash;language is optional for default language set in ./lib/lang.py.&lt;br&gt;
The ISO-639-1 2 letters codes are also supported.&lt;/p&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;h3 id=&#34;example-of-custom-model-zip-upload&#34;&gt;Example of Custom Model Zip Upload
&lt;/h3&gt;&lt;p&gt;(must be a .zip file containing the mandatory model files. Example for XTTSv2: config.json, model.pth, vocab.json and ref.wav)&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;&lt;strong&gt;Linux/MacOS&lt;/strong&gt;
&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;2
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-bash&#34; data-lang=&#34;bash&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;./ebook2audiobook.sh --headless --ebook &amp;lt;ebook_file_path&amp;gt; &lt;span class=&#34;se&#34;&gt;\
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;    --voice &amp;lt;target_voice_file_path&amp;gt; --language &amp;lt;language&amp;gt; --custom_model &amp;lt;custom_model_path&amp;gt;
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;/li&gt;
&lt;li&gt;&lt;strong&gt;Windows&lt;/strong&gt;
&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;2
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-bash&#34; data-lang=&#34;bash&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;ebook2audiobook.cmd --headless --ebook &amp;lt;ebook_file_path&amp;gt; &lt;span class=&#34;se&#34;&gt;\
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;    --voice &amp;lt;target_voice_file_path&amp;gt; --language &amp;lt;language&amp;gt; --custom_model &amp;lt;custom_model_path&amp;gt;
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;/li&gt;
&lt;li&gt;&lt;strong&gt;&amp;lt;custom_model_path&amp;gt;&lt;/strong&gt;: Path to &lt;code&gt;model_name.zip&lt;/code&gt; file,
which must contain (according to the tts engine) all the mandatory files&lt;br&gt;
(see ./lib/models.py).&lt;/li&gt;
&lt;/ul&gt;
&lt;h3 id=&#34;for-detailed-guide-with-list-of-all-parameters-to-use&#34;&gt;For Detailed Guide with list of all Parameters to use
&lt;/h3&gt;&lt;ul&gt;
&lt;li&gt;&lt;strong&gt;Linux/MacOS&lt;/strong&gt;
&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-bash&#34; data-lang=&#34;bash&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;./ebook2audiobook.sh --help
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;/li&gt;
&lt;li&gt;&lt;strong&gt;Windows&lt;/strong&gt;
&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-bash&#34; data-lang=&#34;bash&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;ebook2audiobook.cmd --help
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;/li&gt;
&lt;li&gt;&lt;strong&gt;Or for all OS&lt;/strong&gt;
&lt;code&gt;python app.py --help &lt;/code&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;a id=&#34;help-command-output&#34;&gt;&lt;/a&gt;&lt;/p&gt;
&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt; 1
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt; 2
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt; 3
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt; 4
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt; 5
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt; 6
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt; 7
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt; 8
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt; 9
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;10
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;11
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;12
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;13
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;14
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;15
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;16
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;17
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;18
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;19
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;20
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;21
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;22
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;23
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;24
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;25
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;26
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;27
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;28
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;29
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;30
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;31
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;32
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;33
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;34
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;35
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;36
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;37
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;38
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;39
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;40
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;41
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;42
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;43
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;44
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;45
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;46
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;47
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;48
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;49
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;50
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;51
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;52
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;53
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;54
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;55
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;56
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;57
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;58
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;59
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;60
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;61
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;62
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;63
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;64
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;65
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;66
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;67
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;68
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;69
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;70
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;71
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;72
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;73
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;74
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;75
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;76
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;77
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;78
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;79
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;80
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;81
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;82
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;83
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;84
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;85
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;86
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;87
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;88
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;89
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;90
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;91
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;92
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;93
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;94
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;95
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;96
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;97
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-bash&#34; data-lang=&#34;bash&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;usage: app.py &lt;span class=&#34;o&#34;&gt;[&lt;/span&gt;-h&lt;span class=&#34;o&#34;&gt;]&lt;/span&gt; &lt;span class=&#34;o&#34;&gt;[&lt;/span&gt;--session SESSION&lt;span class=&#34;o&#34;&gt;]&lt;/span&gt; &lt;span class=&#34;o&#34;&gt;[&lt;/span&gt;--share&lt;span class=&#34;o&#34;&gt;]&lt;/span&gt; &lt;span class=&#34;o&#34;&gt;[&lt;/span&gt;--headless&lt;span class=&#34;o&#34;&gt;]&lt;/span&gt; &lt;span class=&#34;o&#34;&gt;[&lt;/span&gt;--ebook EBOOK&lt;span class=&#34;o&#34;&gt;]&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;              &lt;span class=&#34;o&#34;&gt;[&lt;/span&gt;--ebooks_dir EBOOKS_DIR&lt;span class=&#34;o&#34;&gt;]&lt;/span&gt; &lt;span class=&#34;o&#34;&gt;[&lt;/span&gt;--language LANGUAGE&lt;span class=&#34;o&#34;&gt;]&lt;/span&gt; &lt;span class=&#34;o&#34;&gt;[&lt;/span&gt;--voice VOICE&lt;span class=&#34;o&#34;&gt;]&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;              &lt;span class=&#34;o&#34;&gt;[&lt;/span&gt;--device &lt;span class=&#34;o&#34;&gt;{&lt;/span&gt;cpu,gpu,mps&lt;span class=&#34;o&#34;&gt;}]&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;              &lt;span class=&#34;o&#34;&gt;[&lt;/span&gt;--tts_engine &lt;span class=&#34;o&#34;&gt;{&lt;/span&gt;XTTSv2,BARK,VITS,FAIRSEQ,TACOTRON2,YOURTTS,xtts,bark,vits,fairseq,tacotron,yourtts&lt;span class=&#34;o&#34;&gt;}]&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;              &lt;span class=&#34;o&#34;&gt;[&lt;/span&gt;--custom_model CUSTOM_MODEL&lt;span class=&#34;o&#34;&gt;]&lt;/span&gt; &lt;span class=&#34;o&#34;&gt;[&lt;/span&gt;--fine_tuned FINE_TUNED&lt;span class=&#34;o&#34;&gt;]&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;              &lt;span class=&#34;o&#34;&gt;[&lt;/span&gt;--output_format OUTPUT_FORMAT&lt;span class=&#34;o&#34;&gt;]&lt;/span&gt; &lt;span class=&#34;o&#34;&gt;[&lt;/span&gt;--temperature TEMPERATURE&lt;span class=&#34;o&#34;&gt;]&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;              &lt;span class=&#34;o&#34;&gt;[&lt;/span&gt;--length_penalty LENGTH_PENALTY&lt;span class=&#34;o&#34;&gt;]&lt;/span&gt; &lt;span class=&#34;o&#34;&gt;[&lt;/span&gt;--num_beams NUM_BEAMS&lt;span class=&#34;o&#34;&gt;]&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;              &lt;span class=&#34;o&#34;&gt;[&lt;/span&gt;--repetition_penalty REPETITION_PENALTY&lt;span class=&#34;o&#34;&gt;]&lt;/span&gt; &lt;span class=&#34;o&#34;&gt;[&lt;/span&gt;--top_k TOP_K&lt;span class=&#34;o&#34;&gt;]&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;              &lt;span class=&#34;o&#34;&gt;[&lt;/span&gt;--top_p TOP_P&lt;span class=&#34;o&#34;&gt;]&lt;/span&gt; &lt;span class=&#34;o&#34;&gt;[&lt;/span&gt;--speed SPEED&lt;span class=&#34;o&#34;&gt;]&lt;/span&gt; &lt;span class=&#34;o&#34;&gt;[&lt;/span&gt;--enable_text_splitting&lt;span class=&#34;o&#34;&gt;]&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;              &lt;span class=&#34;o&#34;&gt;[&lt;/span&gt;--text_temp TEXT_TEMP&lt;span class=&#34;o&#34;&gt;]&lt;/span&gt; &lt;span class=&#34;o&#34;&gt;[&lt;/span&gt;--waveform_temp WAVEFORM_TEMP&lt;span class=&#34;o&#34;&gt;]&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;              &lt;span class=&#34;o&#34;&gt;[&lt;/span&gt;--output_dir OUTPUT_DIR&lt;span class=&#34;o&#34;&gt;]&lt;/span&gt; &lt;span class=&#34;o&#34;&gt;[&lt;/span&gt;--version&lt;span class=&#34;o&#34;&gt;]&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;Convert eBooks to Audiobooks using a Text-to-Speech model. You can either launch the Gradio interface or run the script in headless mode &lt;span class=&#34;k&#34;&gt;for&lt;/span&gt; direct conversion.
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;options:
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  -h, --help            show this &lt;span class=&#34;nb&#34;&gt;help&lt;/span&gt; message and &lt;span class=&#34;nb&#34;&gt;exit&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  --session SESSION     Session to resume the conversion in &lt;span class=&#34;k&#34;&gt;case&lt;/span&gt; of interruption, crash, 
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;                            or reuse of custom models and custom cloning voices.
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;**** The following options are &lt;span class=&#34;k&#34;&gt;for&lt;/span&gt; all modes:
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  Optional
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;**** The following option are &lt;span class=&#34;k&#34;&gt;for&lt;/span&gt; gradio/gui mode only:
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  Optional
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  --share               Enable a public shareable Gradio link.
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;**** The following options are &lt;span class=&#34;k&#34;&gt;for&lt;/span&gt; --headless mode only:
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  --headless            Run the script in headless mode
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  --ebook EBOOK         Path to the ebook file &lt;span class=&#34;k&#34;&gt;for&lt;/span&gt; conversion. Cannot be used when --ebooks_dir is present.
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  --ebooks_dir EBOOKS_DIR
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;                        Relative or absolute path of the directory containing the files to convert. 
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;                            Cannot be used when --ebook is present.
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  --language LANGUAGE   Language of the e-book. Default language is &lt;span class=&#34;nb&#34;&gt;set&lt;/span&gt; 
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;                            in ./lib/lang.py sed as default &lt;span class=&#34;k&#34;&gt;if&lt;/span&gt; not present. All compatible language codes are in ./lib/lang.py
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;optional parameters:
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  --voice VOICE         &lt;span class=&#34;o&#34;&gt;(&lt;/span&gt;Optional&lt;span class=&#34;o&#34;&gt;)&lt;/span&gt; Path to the voice cloning file &lt;span class=&#34;k&#34;&gt;for&lt;/span&gt; TTS engine. 
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;                            Uses the default voice &lt;span class=&#34;k&#34;&gt;if&lt;/span&gt; not present.
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  --device &lt;span class=&#34;o&#34;&gt;{&lt;/span&gt;cpu,gpu,mps&lt;span class=&#34;o&#34;&gt;}&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;                        &lt;span class=&#34;o&#34;&gt;(&lt;/span&gt;Optional&lt;span class=&#34;o&#34;&gt;)&lt;/span&gt; Pprocessor unit &lt;span class=&#34;nb&#34;&gt;type&lt;/span&gt; &lt;span class=&#34;k&#34;&gt;for&lt;/span&gt; the conversion. 
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;                            Default is &lt;span class=&#34;nb&#34;&gt;set&lt;/span&gt; in ./lib/conf.py &lt;span class=&#34;k&#34;&gt;if&lt;/span&gt; not present. Fall back to CPU &lt;span class=&#34;k&#34;&gt;if&lt;/span&gt; GPU not available.
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  --tts_engine &lt;span class=&#34;o&#34;&gt;{&lt;/span&gt;XTTSv2,BARK,VITS,FAIRSEQ,TACOTRON2,YOURTTS,xtts,bark,vits,fairseq,tacotron,yourtts&lt;span class=&#34;o&#34;&gt;}&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;                        &lt;span class=&#34;o&#34;&gt;(&lt;/span&gt;Optional&lt;span class=&#34;o&#34;&gt;)&lt;/span&gt; Preferred TTS engine &lt;span class=&#34;o&#34;&gt;(&lt;/span&gt;available are: &lt;span class=&#34;o&#34;&gt;[&lt;/span&gt;&lt;span class=&#34;s1&#34;&gt;&amp;#39;XTTSv2&amp;#39;&lt;/span&gt;, &lt;span class=&#34;s1&#34;&gt;&amp;#39;BARK&amp;#39;&lt;/span&gt;, &lt;span class=&#34;s1&#34;&gt;&amp;#39;VITS&amp;#39;&lt;/span&gt;, &lt;span class=&#34;s1&#34;&gt;&amp;#39;FAIRSEQ&amp;#39;&lt;/span&gt;, &lt;span class=&#34;s1&#34;&gt;&amp;#39;TACOTRON2&amp;#39;&lt;/span&gt;, &lt;span class=&#34;s1&#34;&gt;&amp;#39;YOURTTS&amp;#39;&lt;/span&gt;, &lt;span class=&#34;s1&#34;&gt;&amp;#39;xtts&amp;#39;&lt;/span&gt;, &lt;span class=&#34;s1&#34;&gt;&amp;#39;bark&amp;#39;&lt;/span&gt;, &lt;span class=&#34;s1&#34;&gt;&amp;#39;vits&amp;#39;&lt;/span&gt;, &lt;span class=&#34;s1&#34;&gt;&amp;#39;fairseq&amp;#39;&lt;/span&gt;, &lt;span class=&#34;s1&#34;&gt;&amp;#39;tacotron&amp;#39;&lt;/span&gt;, &lt;span class=&#34;s1&#34;&gt;&amp;#39;yourtts&amp;#39;&lt;/span&gt;&lt;span class=&#34;o&#34;&gt;]&lt;/span&gt;.
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;                            Default depends on the selected language. The tts engine should be compatible with the chosen language
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  --custom_model CUSTOM_MODEL
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;                        &lt;span class=&#34;o&#34;&gt;(&lt;/span&gt;Optional&lt;span class=&#34;o&#34;&gt;)&lt;/span&gt; Path to the custom model zip file cntaining mandatory model files. 
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;                            Please refer to ./lib/models.py
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  --fine_tuned FINE_TUNED
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;                        &lt;span class=&#34;o&#34;&gt;(&lt;/span&gt;Optional&lt;span class=&#34;o&#34;&gt;)&lt;/span&gt; Fine tuned model path. Default is &lt;span class=&#34;nb&#34;&gt;builtin&lt;/span&gt; model.
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  --output_format OUTPUT_FORMAT
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;                        &lt;span class=&#34;o&#34;&gt;(&lt;/span&gt;Optional&lt;span class=&#34;o&#34;&gt;)&lt;/span&gt; Output audio format. Default is &lt;span class=&#34;nb&#34;&gt;set&lt;/span&gt; in ./lib/conf.py
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  --temperature TEMPERATURE
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;                        &lt;span class=&#34;o&#34;&gt;(&lt;/span&gt;xtts only, optional&lt;span class=&#34;o&#34;&gt;)&lt;/span&gt; Temperature &lt;span class=&#34;k&#34;&gt;for&lt;/span&gt; the model. 
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;                            Default to config.json model. Higher temperatures lead to more creative outputs.
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  --length_penalty LENGTH_PENALTY
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;                        &lt;span class=&#34;o&#34;&gt;(&lt;/span&gt;xtts only, optional&lt;span class=&#34;o&#34;&gt;)&lt;/span&gt; A length penalty applied to the autoregressive decoder. 
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;                            Default to config.json model. Not applied to custom models.
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  --num_beams NUM_BEAMS
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;                        &lt;span class=&#34;o&#34;&gt;(&lt;/span&gt;xtts only, optional&lt;span class=&#34;o&#34;&gt;)&lt;/span&gt; Controls how many alternative sequences the model explores. Must be equal or greater than length penalty. 
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;                            Default to config.json model.
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  --repetition_penalty REPETITION_PENALTY
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;                        &lt;span class=&#34;o&#34;&gt;(&lt;/span&gt;xtts only, optional&lt;span class=&#34;o&#34;&gt;)&lt;/span&gt; A penalty that prevents the autoregressive decoder from repeating itself. 
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;                            Default to config.json model.
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  --top_k TOP_K         &lt;span class=&#34;o&#34;&gt;(&lt;/span&gt;xtts only, optional&lt;span class=&#34;o&#34;&gt;)&lt;/span&gt; Top-k sampling. 
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;                            Lower values mean more likely outputs and increased audio generation speed. 
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;                            Default to config.json model.
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  --top_p TOP_P         &lt;span class=&#34;o&#34;&gt;(&lt;/span&gt;xtts only, optional&lt;span class=&#34;o&#34;&gt;)&lt;/span&gt; Top-p sampling. 
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;                            Lower values mean more likely outputs and increased audio generation speed. Default to config.json model.
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  --speed SPEED         &lt;span class=&#34;o&#34;&gt;(&lt;/span&gt;xtts only, optional&lt;span class=&#34;o&#34;&gt;)&lt;/span&gt; Speed factor &lt;span class=&#34;k&#34;&gt;for&lt;/span&gt; the speech generation. 
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;                            Default to config.json model.
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  --enable_text_splitting
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;                        &lt;span class=&#34;o&#34;&gt;(&lt;/span&gt;xtts only, optional&lt;span class=&#34;o&#34;&gt;)&lt;/span&gt; Enable TTS text splitting. This option is known to not be very efficient. 
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;                            Default to config.json model.
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  --text_temp TEXT_TEMP
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;                        &lt;span class=&#34;o&#34;&gt;(&lt;/span&gt;bark only, optional&lt;span class=&#34;o&#34;&gt;)&lt;/span&gt; Text Temperature &lt;span class=&#34;k&#34;&gt;for&lt;/span&gt; the model. 
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;                            Default to 0.85. Higher temperatures lead to more creative outputs.
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  --waveform_temp WAVEFORM_TEMP
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;                        &lt;span class=&#34;o&#34;&gt;(&lt;/span&gt;bark only, optional&lt;span class=&#34;o&#34;&gt;)&lt;/span&gt; Waveform Temperature &lt;span class=&#34;k&#34;&gt;for&lt;/span&gt; the model. 
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;                            Default to 0.5. Higher temperatures lead to more creative outputs.
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  --output_dir OUTPUT_DIR
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;                        &lt;span class=&#34;o&#34;&gt;(&lt;/span&gt;Optional&lt;span class=&#34;o&#34;&gt;)&lt;/span&gt; Path to the output directory. Default is &lt;span class=&#34;nb&#34;&gt;set&lt;/span&gt; in ./lib/conf.py
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  --version             Show the version of the script and &lt;span class=&#34;nb&#34;&gt;exit&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;Example usage:    
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;Windows:
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;    Gradio/GUI:
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;    ebook2audiobook.cmd
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;    Headless mode:
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;    ebook2audiobook.cmd --headless --ebook &lt;span class=&#34;s1&#34;&gt;&amp;#39;/path/to/file&amp;#39;&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;Linux/Mac:
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;    Gradio/GUI:
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;    ./ebook2audiobook.sh
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;    Headless mode:
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;    ./ebook2audiobook.sh --headless --ebook &lt;span class=&#34;s1&#34;&gt;&amp;#39;/path/to/file&amp;#39;&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;    
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;Tip: to add of silence &lt;span class=&#34;o&#34;&gt;(&lt;/span&gt;1.4 seconds&lt;span class=&#34;o&#34;&gt;)&lt;/span&gt; into your text just use &lt;span class=&#34;s2&#34;&gt;&amp;#34;###&amp;#34;&lt;/span&gt; or &lt;span class=&#34;s2&#34;&gt;&amp;#34;[pause]&amp;#34;&lt;/span&gt;.
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;p&gt;NOTE: in gradio/gui mode, to cancel a running conversion, just click on the [X] from the ebook upload component.&lt;/p&gt;
&lt;p&gt;TIP: if it needs some more pauses, just add &amp;lsquo;###&amp;rsquo; or &amp;lsquo;[pause]&amp;rsquo; between the words you wish more pause. one [pause] equals to 1.4 seconds&lt;/p&gt;
&lt;h4 id=&#34;docker-gpu-options&#34;&gt;Docker GPU Options
&lt;/h4&gt;&lt;p&gt;Available pre-build tags: &lt;code&gt;latest&lt;/code&gt; (CUDA 11.8)&lt;/p&gt;
&lt;h4 id=&#34;edit-if-gpu-isnt-detected-then-youll-have-to-build-the-image---building-the-docker-container&#34;&gt;Edit: IF GPU isn&amp;rsquo;t detected then you&amp;rsquo;ll have to build the image -&amp;gt; &lt;a class=&#34;link&#34; href=&#34;#building-the-docker-container&#34; &gt;Building the Docker Container&lt;/a&gt;
&lt;/h4&gt;&lt;h4 id=&#34;running-the-pre-built-docker-container&#34;&gt;Running the pre-built Docker Container
&lt;/h4&gt;&lt;p&gt;-Run with CPU only&lt;/p&gt;
&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-powershell&#34; data-lang=&#34;powershell&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;&lt;span class=&#34;n&#34;&gt;docker&lt;/span&gt; &lt;span class=&#34;n&#34;&gt;run&lt;/span&gt; &lt;span class=&#34;p&#34;&gt;-&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;-pull&lt;/span&gt; &lt;span class=&#34;n&#34;&gt;always&lt;/span&gt; &lt;span class=&#34;p&#34;&gt;-&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;-rm&lt;/span&gt; &lt;span class=&#34;n&#34;&gt;-p&lt;/span&gt; &lt;span class=&#34;mf&#34;&gt;7860&lt;/span&gt;&lt;span class=&#34;err&#34;&gt;:&lt;/span&gt;&lt;span class=&#34;mf&#34;&gt;7860&lt;/span&gt; &lt;span class=&#34;n&#34;&gt;athomasson2&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;/&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;ebook2audiobook&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;p&gt;-Run with GPU Speedup (NVIDIA compatible only)&lt;/p&gt;
&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-powershell&#34; data-lang=&#34;powershell&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;&lt;span class=&#34;n&#34;&gt;docker&lt;/span&gt; &lt;span class=&#34;n&#34;&gt;run&lt;/span&gt; &lt;span class=&#34;p&#34;&gt;-&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;-pull&lt;/span&gt; &lt;span class=&#34;n&#34;&gt;always&lt;/span&gt; &lt;span class=&#34;p&#34;&gt;-&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;-rm&lt;/span&gt; &lt;span class=&#34;p&#34;&gt;-&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;-gpus&lt;/span&gt; &lt;span class=&#34;n&#34;&gt;all&lt;/span&gt; &lt;span class=&#34;n&#34;&gt;-p&lt;/span&gt; &lt;span class=&#34;mf&#34;&gt;7860&lt;/span&gt;&lt;span class=&#34;err&#34;&gt;:&lt;/span&gt;&lt;span class=&#34;mf&#34;&gt;7860&lt;/span&gt; &lt;span class=&#34;n&#34;&gt;athomasson2&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;/&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;ebook2audiobook&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;p&gt;This command will start the Gradio interface on port 7860.(localhost:7860)&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;For more options add the parameter &lt;code&gt;--help&lt;/code&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;h4 id=&#34;building-the-docker-container&#34;&gt;Building the Docker Container
&lt;/h4&gt;&lt;ul&gt;
&lt;li&gt;You can build the docker image with the command:&lt;/li&gt;
&lt;/ul&gt;
&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-powershell&#34; data-lang=&#34;powershell&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;&lt;span class=&#34;n&#34;&gt;docker&lt;/span&gt; &lt;span class=&#34;n&#34;&gt;build&lt;/span&gt; &lt;span class=&#34;n&#34;&gt;-t&lt;/span&gt; &lt;span class=&#34;n&#34;&gt;athomasson2&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;/&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;ebook2audiobook&lt;/span&gt; &lt;span class=&#34;p&#34;&gt;.&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;h4 id=&#34;avalible-docker-build-arguments&#34;&gt;Avalible Docker Build Arguments
&lt;/h4&gt;&lt;p&gt;&lt;code&gt;--build-arg TORCH_VERSION=cuda118&lt;/code&gt; Available tags: [cuda121, cuda118, cuda128, rocm, xpu, cpu]&lt;/p&gt;
&lt;p&gt;All CUDA version numbers should work, Ex: CUDA 11.6-&amp;gt; cuda116&lt;/p&gt;
&lt;p&gt;&lt;code&gt;--build-arg SKIP_XTTS_TEST=true&lt;/code&gt; (Saves space by not baking XTTSv2 model into docker image)&lt;/p&gt;
&lt;h2 id=&#34;docker-container-file-locations&#34;&gt;Docker container file locations
&lt;/h2&gt;&lt;p&gt;All ebook2audiobooks will have the base dir of &lt;code&gt;/app/&lt;/code&gt;
For example:
&lt;code&gt;tmp&lt;/code&gt; = &lt;code&gt;/app/tmp&lt;/code&gt;
&lt;code&gt;audiobooks&lt;/code&gt; = &lt;code&gt;/app/audiobooks&lt;/code&gt;&lt;/p&gt;
&lt;h2 id=&#34;docker-headless-guide&#34;&gt;Docker headless guide
&lt;/h2&gt;&lt;ul&gt;
&lt;li&gt;Before you do run this you need to create a dir named &amp;ldquo;input-folder&amp;rdquo; in your current dir
which will be linked, This is where you can put your input files for the docker image to see&lt;/li&gt;
&lt;/ul&gt;
&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-bash&#34; data-lang=&#34;bash&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;mkdir input-folder &lt;span class=&#34;o&#34;&gt;&amp;amp;&amp;amp;&lt;/span&gt; mkdir Audiobooks
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;ul&gt;
&lt;li&gt;In the command below swap out &lt;strong&gt;YOUR_INPUT_FILE.TXT&lt;/strong&gt; with the name of your input file&lt;/li&gt;
&lt;/ul&gt;
&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;2
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;3
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;4
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;5
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-bash&#34; data-lang=&#34;bash&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;docker run --pull always --rm &lt;span class=&#34;se&#34;&gt;\
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;    -v &lt;span class=&#34;k&#34;&gt;$(&lt;/span&gt;&lt;span class=&#34;nb&#34;&gt;pwd&lt;/span&gt;&lt;span class=&#34;k&#34;&gt;)&lt;/span&gt;/input-folder:/app/input_folder &lt;span class=&#34;se&#34;&gt;\
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;    -v &lt;span class=&#34;k&#34;&gt;$(&lt;/span&gt;&lt;span class=&#34;nb&#34;&gt;pwd&lt;/span&gt;&lt;span class=&#34;k&#34;&gt;)&lt;/span&gt;/audiobooks:/app/audiobooks &lt;span class=&#34;se&#34;&gt;\
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;    athomasson2/ebook2audiobook &lt;span class=&#34;se&#34;&gt;\
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;    --headless --ebook /input_folder/YOUR_EBOOK_FILE
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;ul&gt;
&lt;li&gt;The output Audiobooks will be found in the Audiobook folder which will also be located
in your local dir you ran this docker command in&lt;/li&gt;
&lt;/ul&gt;
&lt;h2 id=&#34;to-get-the-help-command-for-the-other-parameters-this-program-has-you-can-run-this&#34;&gt;To get the help command for the other parameters this program has you can run this
&lt;/h2&gt;&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-bash&#34; data-lang=&#34;bash&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;docker run --pull always --rm athomasson2/ebook2audiobook --help
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;p&gt;That will output this
&lt;a class=&#34;link&#34; href=&#34;#help-command-output&#34; &gt;Help command output&lt;/a&gt;&lt;/p&gt;
&lt;h3 id=&#34;docker-compose&#34;&gt;Docker Compose
&lt;/h3&gt;&lt;p&gt;This project uses Docker Compose to run locally. You can enable or disable GPU support
by setting either &lt;code&gt;*gpu-enabled&lt;/code&gt; or &lt;code&gt;*gpu-disabled&lt;/code&gt; in &lt;code&gt;docker-compose.yml&lt;/code&gt;&lt;/p&gt;
&lt;h4 id=&#34;steps-to-run&#34;&gt;Steps to Run
&lt;/h4&gt;&lt;ol&gt;
&lt;li&gt;&lt;strong&gt;Clone the Repository&lt;/strong&gt; (if you haven&amp;rsquo;t already):
&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;2
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-bash&#34; data-lang=&#34;bash&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;git clone https://github.com/DrewThomasson/ebook2audiobook.git
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;&lt;span class=&#34;nb&#34;&gt;cd&lt;/span&gt; ebook2audiobook
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;/li&gt;
&lt;li&gt;&lt;strong&gt;Set GPU Support (disabled by default)&lt;/strong&gt;
To enable GPU support, modify &lt;code&gt;docker-compose.yml&lt;/code&gt; and change &lt;code&gt;*gpu-disabled&lt;/code&gt; to &lt;code&gt;*gpu-enabled&lt;/code&gt;&lt;/li&gt;
&lt;li&gt;&lt;strong&gt;Start the service:&lt;/strong&gt;
&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;2
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;3
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;4
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;5
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-bash&#34; data-lang=&#34;bash&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;&lt;span class=&#34;c1&#34;&gt;# Docker&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;docker-compose up -d &lt;span class=&#34;c1&#34;&gt;# To update add --build&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;&lt;span class=&#34;c1&#34;&gt;# Podman&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;podman compose -f podman-compose.yml up -d &lt;span class=&#34;c1&#34;&gt;# To update add --build&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;/li&gt;
&lt;li&gt;&lt;strong&gt;Access the service:&lt;/strong&gt;
The service will be available at http://localhost:7860.&lt;/li&gt;
&lt;/ol&gt;
&lt;h2 id=&#34;common-docker-issues&#34;&gt;Common Docker Issues
&lt;/h2&gt;&lt;ul&gt;
&lt;li&gt;
&lt;p&gt;My NVIDIA GPU isnt being detected?? -&amp;gt; &lt;a class=&#34;link&#34; href=&#34;https://github.com/DrewThomasson/ebook2audiobook/wiki/GPU-ISSUES&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;GPU ISSUES Wiki Page&lt;/a&gt;&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;&lt;code&gt;python: can&#39;t open file &#39;/home/user/app/app.py&#39;: [Errno 2] No such file or directory&lt;/code&gt; (Just remove all post arguments as I replaced the &lt;code&gt;CMD&lt;/code&gt; with &lt;code&gt;ENTRYPOINT&lt;/code&gt; in the &lt;a class=&#34;link&#34; href=&#34;Dockerfile&#34; &gt;Dockerfile&lt;/a&gt;)&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;Example: &lt;code&gt;docker run --pull always athomasson2/ebook2audiobook app.py --script_mode full_docker&lt;/code&gt; - &amp;gt; corrected - &amp;gt; &lt;code&gt;docker run --pull always athomasson2/ebook2audiobook&lt;/code&gt;&lt;/li&gt;
&lt;li&gt;Arguments can be easily added like this now &lt;code&gt;docker run --pull always athomasson2/ebook2audiobook --share&lt;/code&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;Docker gets stuck downloading Fine-Tuned models.
(This does not happen for every computer but some appear to run into this issue)
Disabling the progress bar appears to fix the issue,
as discussed &lt;a class=&#34;link&#34; href=&#34;https://github.com/DrewThomasson/ebook2audiobook/issues/191&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;here in #191&lt;/a&gt;
Example of adding this fix in the &lt;code&gt;docker run&lt;/code&gt; command&lt;/p&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;2
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-Dockerfile&#34; data-lang=&#34;Dockerfile&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;docker run --pull always --rm --gpus all -e &lt;span class=&#34;nv&#34;&gt;HF_HUB_DISABLE_PROGRESS_BARS&lt;/span&gt;&lt;span class=&#34;o&#34;&gt;=&lt;/span&gt;&lt;span class=&#34;m&#34;&gt;1&lt;/span&gt; -e &lt;span class=&#34;nv&#34;&gt;HF_HUB_ENABLE_HF_TRANSFER&lt;/span&gt;&lt;span class=&#34;o&#34;&gt;=&lt;/span&gt;&lt;span class=&#34;m&#34;&gt;0&lt;/span&gt; &lt;span class=&#34;se&#34;&gt;\
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;    -p 7860:7860 athomasson2/ebook2audiobook&lt;span class=&#34;err&#34;&gt;
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;h2 id=&#34;fine-tuned-tts-models&#34;&gt;Fine Tuned TTS models
&lt;/h2&gt;&lt;h4 id=&#34;fine-tune-your-own-xttsv2-model&#34;&gt;Fine Tune your own XTTSv2 model
&lt;/h4&gt;&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;https://huggingface.co/spaces/drewThomasson/xtts-finetune-webui-gpu&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;img src=&#34;https://img.shields.io/badge/Hugging%20Face-Spaces-yellow?style=flat&amp;amp;logo=huggingface&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;Hugging Face&#34;
	
	
&gt;&lt;/a&gt; &lt;a class=&#34;link&#34; href=&#34;https://github.com/DrewThomasson/ebook2audiobook/blob/v25/Notebooks/finetune/xtts/kaggle-xtts-finetune-webui-gradio-gui.ipynb&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;img src=&#34;https://img.shields.io/badge/Kaggle-035a7d?style=flat&amp;amp;logo=kaggle&amp;amp;logoColor=white&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;Kaggle&#34;
	
	
&gt;&lt;/a&gt; &lt;a class=&#34;link&#34; href=&#34;https://colab.research.google.com/github/DrewThomasson/ebook2audiobook/blob/v25/Notebooks/finetune/xtts/colab_xtts_finetune_webui.ipynb&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;img src=&#34;https://colab.research.google.com/assets/colab-badge.svg&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;Open In Colab&#34;
	
	
&gt;&lt;/a&gt;&lt;/p&gt;
&lt;h4 id=&#34;de-noise-training-data&#34;&gt;De-noise training data
&lt;/h4&gt;&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;https://huggingface.co/spaces/drewThomasson/DeepFilterNet2_no_limit&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;img src=&#34;https://img.shields.io/badge/Hugging%20Face-Spaces-yellow?style=flat&amp;amp;logo=huggingface&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;Hugging Face&#34;
	
	
&gt;&lt;/a&gt; &lt;a class=&#34;link&#34; href=&#34;https://github.com/Rikorose/DeepFilterNet&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;img src=&#34;https://img.shields.io/badge/DeepFilterNet-181717?logo=github&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;GitHub Repo&#34;
	
	
&gt;&lt;/a&gt;&lt;/p&gt;
&lt;h3 id=&#34;fine-tuned-tts-collection&#34;&gt;Fine Tuned TTS Collection
&lt;/h3&gt;&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;https://huggingface.co/drewThomasson/fineTunedTTSModels/tree/main&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;img src=&#34;https://img.shields.io/badge/Hugging%20Face-Models-yellow?style=flat&amp;amp;logo=huggingface&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;Hugging Face&#34;
	
	
&gt;&lt;/a&gt;&lt;/p&gt;
&lt;p&gt;For an XTTSv2 custom model a ref audio clip of the voice reference is mandatory:&lt;/p&gt;
&lt;h2 id=&#34;supported-ebook-formats&#34;&gt;Supported eBook Formats
&lt;/h2&gt;&lt;ul&gt;
&lt;li&gt;&lt;code&gt;.epub&lt;/code&gt;, &lt;code&gt;.pdf&lt;/code&gt;, &lt;code&gt;.mobi&lt;/code&gt;, &lt;code&gt;.txt&lt;/code&gt;, &lt;code&gt;.html&lt;/code&gt;, &lt;code&gt;.rtf&lt;/code&gt;, &lt;code&gt;.chm&lt;/code&gt;, &lt;code&gt;.lit&lt;/code&gt;,
&lt;code&gt;.pdb&lt;/code&gt;, &lt;code&gt;.fb2&lt;/code&gt;, &lt;code&gt;.odt&lt;/code&gt;, &lt;code&gt;.cbr&lt;/code&gt;, &lt;code&gt;.cbz&lt;/code&gt;, &lt;code&gt;.prc&lt;/code&gt;, &lt;code&gt;.lrf&lt;/code&gt;, &lt;code&gt;.pml&lt;/code&gt;,
&lt;code&gt;.snb&lt;/code&gt;, &lt;code&gt;.cbc&lt;/code&gt;, &lt;code&gt;.rb&lt;/code&gt;, &lt;code&gt;.tcr&lt;/code&gt;&lt;/li&gt;
&lt;li&gt;&lt;strong&gt;Best results&lt;/strong&gt;: &lt;code&gt;.epub&lt;/code&gt; or &lt;code&gt;.mobi&lt;/code&gt; for automatic chapter detection&lt;/li&gt;
&lt;/ul&gt;
&lt;h2 id=&#34;output-formats&#34;&gt;Output Formats
&lt;/h2&gt;&lt;ul&gt;
&lt;li&gt;Creates a &lt;code&gt;[&#39;m4b&#39;, &#39;m4a&#39;, &#39;mp4&#39;, &#39;webm&#39;, &#39;mov&#39;, &#39;mp3&#39;, &#39;flac&#39;, &#39;wav&#39;, &#39;ogg&#39;, &#39;aac&#39;]&lt;/code&gt; (set in ./lib/conf.py) file with metadata and chapters.&lt;/li&gt;
&lt;/ul&gt;
&lt;h2 id=&#34;updating-to-latest-version&#34;&gt;Updating to Latest Version
&lt;/h2&gt;&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;2
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;3
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-bash&#34; data-lang=&#34;bash&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;git pull &lt;span class=&#34;c1&#34;&gt;# Locally/Compose&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;docker pull athomasson2/ebook2audiobook:latest &lt;span class=&#34;c1&#34;&gt;# For Pre-build docker images&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;h2 id=&#34;reverting-to-older-versions&#34;&gt;Reverting to older Versions
&lt;/h2&gt;&lt;p&gt;Releases can be found -&amp;gt; &lt;a class=&#34;link&#34; href=&#34;https://github.com/DrewThomasson/ebook2audiobook/releases&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;here&lt;/a&gt;&lt;/p&gt;
&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;2
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;3
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-bash&#34; data-lang=&#34;bash&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;git checkout tags/VERSION_NUM &lt;span class=&#34;c1&#34;&gt;# Locally/Compose -&amp;gt; Example: git checkout tags/v25.7.7&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;athomasson2/ebook2audiobook:VERSION_NUM &lt;span class=&#34;c1&#34;&gt;# For Pre-build docker images -&amp;gt; Example: athomasson2/ebook2audiobook:v25.7.7&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;h2 id=&#34;common-issues&#34;&gt;Common Issues:
&lt;/h2&gt;&lt;ul&gt;
&lt;li&gt;My NVIDIA GPU isnt being detected?? -&amp;gt; &lt;a class=&#34;link&#34; href=&#34;https://github.com/DrewThomasson/ebook2audiobook/wiki/GPU-ISSUES&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;GPU ISSUES Wiki Page&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;CPU is slow (better on server smp CPU) while NVIDIA GPU can have almost real time conversion.
&lt;a class=&#34;link&#34; href=&#34;https://github.com/DrewThomasson/ebook2audiobook/discussions/19#discussioncomment-10879846&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;Discussion about this&lt;/a&gt;
For faster multilingual generation I would suggest my other
&lt;a class=&#34;link&#34; href=&#34;https://github.com/DrewThomasson/ebook2audiobookpiper-tts&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;project that uses piper-tts&lt;/a&gt; instead
(It doesn&amp;rsquo;t have zero-shot voice cloning though, and is Siri quality voices, but it is much faster on cpu).&lt;/li&gt;
&lt;li&gt;&amp;ldquo;I&amp;rsquo;m having dependency issues&amp;rdquo; - Just use the docker, its fully self contained and has a headless mode,
add &lt;code&gt;--help&lt;/code&gt; parameter at the end of the docker run command for more information.&lt;/li&gt;
&lt;li&gt;&amp;ldquo;Im getting a truncated audio issue!&amp;rdquo; - PLEASE MAKE AN ISSUE OF THIS,
we don&amp;rsquo;t speak every language and need advise from users to fine tune the sentence splitting logic.😊&lt;/li&gt;
&lt;/ul&gt;
&lt;h2 id=&#34;what-we-need-help-with-&#34;&gt;What we need help with! 🙌
&lt;/h2&gt;&lt;h2 id=&#34;full-list-of-things-can-be-found-here&#34;&gt;&lt;a class=&#34;link&#34; href=&#34;https://github.com/DrewThomasson/ebook2audiobook/issues/32&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;Full list of things can be found here&lt;/a&gt;
&lt;/h2&gt;&lt;ul&gt;
&lt;li&gt;Any help from people speaking any of the supported languages to help us improve the models&lt;/li&gt;
&lt;/ul&gt;
&lt;h2 id=&#34;do-you-need-to-rent-a-gpu-to-boost-service-from-us&#34;&gt;Do you need to rent a GPU to boost service from us?
&lt;/h2&gt;&lt;ul&gt;
&lt;li&gt;A poll is open here &lt;a class=&#34;link&#34; href=&#34;https://github.com/DrewThomasson/ebook2audiobook/discussions/889&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;https://github.com/DrewThomasson/ebook2audiobook/discussions/889&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;h2 id=&#34;special-thanks&#34;&gt;Special Thanks
&lt;/h2&gt;&lt;ul&gt;
&lt;li&gt;&lt;strong&gt;Coqui TTS&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://github.com/idiap/coqui-ai-TTS&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;Coqui TTS GitHub&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;strong&gt;Calibre&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://calibre-ebook.com&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;Calibre Website&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;strong&gt;FFmpeg&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://ffmpeg.org&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;FFmpeg Website&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;https://github.com/DrewThomasson/ebook2audiobook/issues/8&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;@shakenbake15 for better chapter saving method&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
</description>
        </item>
        <item>
        <title>Product Hunt Daily | 2025-10-21</title>
        <link>https://producthunt.programnotes.cn/en/p/product-hunt-daily-2025-10-21/</link>
        <pubDate>Tue, 21 Oct 2025 07:30:35 +0000</pubDate>
        
        <guid>https://producthunt.programnotes.cn/en/p/product-hunt-daily-2025-10-21/</guid>
        <description>&lt;img src="https://ph-files.imgix.net/3092ad8c-69f9-4198-b0c6-4e148cd1bb66.png?auto=format" alt="Featured image of post Product Hunt Daily | 2025-10-21" /&gt;&lt;h2 id=&#34;1-fish-audio-s1&#34;&gt;1. Fish Audio S1
&lt;/h2&gt;&lt;p&gt;&lt;strong&gt;Tagline&lt;/strong&gt;: Expressive Voice Cloning and Text-to-Speech&lt;br&gt;
&lt;strong&gt;Description&lt;/strong&gt;: Fish Audio S1 is the most expressive and emotionally rich TTS model—creating lifelike voices that capture emotion, rhythm, and nuance. Clone any voice in 10 seconds, preserving accent, tone, and speaking habits with unmatched realism.&lt;br&gt;
&lt;strong&gt;Website&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://www.producthunt.com/r/RXO5YOK7ZBZYFG?utm_campaign=producthunt-api&amp;amp;utm_medium=api-v2&amp;amp;utm_source=Application%3A&amp;#43;weekly&amp;#43;%28ID%3A&amp;#43;148189%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;open&lt;/a&gt;&lt;br&gt;
&lt;strong&gt;Product Hunt&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://www.producthunt.com/products/fish-speech?utm_campaign=producthunt-api&amp;amp;utm_medium=api-v2&amp;amp;utm_source=Application%3A&amp;#43;weekly&amp;#43;%28ID%3A&amp;#43;148189%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;View on Product Hunt&lt;/a&gt;&lt;br&gt;
&lt;img src=&#34;https://ph-files.imgix.net/3092ad8c-69f9-4198-b0c6-4e148cd1bb66.png?auto=format&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;Fish Audio S1&#34;
	
	
&gt;&lt;br&gt;
&lt;strong&gt;Keyword&lt;/strong&gt;: Voice cloning, text-to-speech, TTS, expressive, lifelike voices, emotion, rhythm, nuance, voice cloning, accent, tone, realism, Fish Audio S1&lt;br&gt;
&lt;strong&gt;VotesCount&lt;/strong&gt;: 🔺413&lt;br&gt;
&lt;strong&gt;Featured&lt;/strong&gt;: Yes&lt;br&gt;
&lt;strong&gt;CreatedAt&lt;/strong&gt;: 2025-10-20 07:01 AM (UTC)&lt;/p&gt;
&lt;hr&gt;
&lt;h2 id=&#34;2-replymer&#34;&gt;2. Replymer
&lt;/h2&gt;&lt;p&gt;&lt;strong&gt;Tagline&lt;/strong&gt;: Human replies that sell your product&lt;br&gt;
&lt;strong&gt;Description&lt;/strong&gt;: Replymer helps your brand grow through authentic, human‑written replies that recommend your product in the right conversations.&lt;br&gt;
&lt;strong&gt;Website&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://www.producthunt.com/r/ATCTFUFRUDRMHA?utm_campaign=producthunt-api&amp;amp;utm_medium=api-v2&amp;amp;utm_source=Application%3A&amp;#43;weekly&amp;#43;%28ID%3A&amp;#43;148189%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;open&lt;/a&gt;&lt;br&gt;
&lt;strong&gt;Product Hunt&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://www.producthunt.com/products/replymer?utm_campaign=producthunt-api&amp;amp;utm_medium=api-v2&amp;amp;utm_source=Application%3A&amp;#43;weekly&amp;#43;%28ID%3A&amp;#43;148189%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;View on Product Hunt&lt;/a&gt;&lt;br&gt;
&lt;img src=&#34;https://ph-files.imgix.net/4f07fe8a-bb07-4ee8-8060-c848711686e8.png?auto=format&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;Replymer&#34;
	
	
&gt;&lt;br&gt;
&lt;strong&gt;Keyword&lt;/strong&gt;: Human replies, product recommendations, brand growth, authentic replies, social selling, conversation marketing&lt;br&gt;
&lt;strong&gt;VotesCount&lt;/strong&gt;: 🔺379&lt;br&gt;
&lt;strong&gt;Featured&lt;/strong&gt;: Yes&lt;br&gt;
&lt;strong&gt;CreatedAt&lt;/strong&gt;: 2025-10-20 07:01 AM (UTC)&lt;/p&gt;
&lt;hr&gt;
&lt;h2 id=&#34;3-logic-inc&#34;&gt;3. Logic, Inc.
&lt;/h2&gt;&lt;p&gt;&lt;strong&gt;Tagline&lt;/strong&gt;: Automate recurring decisions in plain English&lt;br&gt;
&lt;strong&gt;Description&lt;/strong&gt;: Logic automates recurring decisions and reviews. Write your process once in plain English, and automate it anywhere. From content moderation to invoice processing, Logic lets you deploy in minutes, not months.&lt;br&gt;
&lt;strong&gt;Website&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://www.producthunt.com/r/LKORSMXKRP6577?utm_campaign=producthunt-api&amp;amp;utm_medium=api-v2&amp;amp;utm_source=Application%3A&amp;#43;weekly&amp;#43;%28ID%3A&amp;#43;148189%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;open&lt;/a&gt;&lt;br&gt;
&lt;strong&gt;Product Hunt&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://www.producthunt.com/products/logic-effortless-operational-magic?utm_campaign=producthunt-api&amp;amp;utm_medium=api-v2&amp;amp;utm_source=Application%3A&amp;#43;weekly&amp;#43;%28ID%3A&amp;#43;148189%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;View on Product Hunt&lt;/a&gt;&lt;br&gt;
&lt;img src=&#34;https://ph-files.imgix.net/3b9c2e5e-f9f3-4746-8354-40d798608a71.png?auto=format&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;Logic, Inc.&#34;
	
	
&gt;&lt;br&gt;
&lt;strong&gt;Keyword&lt;/strong&gt;: Automation, Decisions, Plain English, Process Automation, No-Code, Content Moderation, Invoice Processing, Deploy Quickly&lt;br&gt;
&lt;strong&gt;VotesCount&lt;/strong&gt;: 🔺291&lt;br&gt;
&lt;strong&gt;Featured&lt;/strong&gt;: Yes&lt;br&gt;
&lt;strong&gt;CreatedAt&lt;/strong&gt;: 2025-10-20 07:01 AM (UTC)&lt;/p&gt;
&lt;hr&gt;
&lt;h2 id=&#34;4-voice-gecko&#34;&gt;4. Voice Gecko
&lt;/h2&gt;&lt;p&gt;&lt;strong&gt;Tagline&lt;/strong&gt;: Voice dictation at your fingertips—type less, say more.&lt;br&gt;
&lt;strong&gt;Description&lt;/strong&gt;: Instant dictation for desktop. Press a shortcut, speak, and instantly get accurate text on your clipboard—perfect for emails, coding, AI prompts, or brain dumps.&lt;br&gt;
&lt;strong&gt;Website&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://www.producthunt.com/r/IN6NQQFFTBMSWU?utm_campaign=producthunt-api&amp;amp;utm_medium=api-v2&amp;amp;utm_source=Application%3A&amp;#43;weekly&amp;#43;%28ID%3A&amp;#43;148189%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;open&lt;/a&gt;&lt;br&gt;
&lt;strong&gt;Product Hunt&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://www.producthunt.com/products/voice-gecko?utm_campaign=producthunt-api&amp;amp;utm_medium=api-v2&amp;amp;utm_source=Application%3A&amp;#43;weekly&amp;#43;%28ID%3A&amp;#43;148189%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;View on Product Hunt&lt;/a&gt;&lt;br&gt;
&lt;img src=&#34;https://ph-files.imgix.net/5b1a074d-221e-4952-aa01-ae53fb806e3e.jpeg?auto=format&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;Voice Gecko&#34;
	
	
&gt;&lt;br&gt;
&lt;strong&gt;Keyword&lt;/strong&gt;: voice dictation, dictation software, voice to text, speech to text, clipboard, desktop, productivity, typing, shortcut, AI prompts, brain dump, voice input&lt;br&gt;
&lt;strong&gt;VotesCount&lt;/strong&gt;: 🔺237&lt;br&gt;
&lt;strong&gt;Featured&lt;/strong&gt;: Yes&lt;br&gt;
&lt;strong&gt;CreatedAt&lt;/strong&gt;: 2025-10-20 07:01 AM (UTC)&lt;/p&gt;
&lt;hr&gt;
&lt;h2 id=&#34;5-simplora&#34;&gt;5. Simplora
&lt;/h2&gt;&lt;p&gt;&lt;strong&gt;Tagline&lt;/strong&gt;: Meetings that make you smarter, not confused&lt;br&gt;
&lt;strong&gt;Description&lt;/strong&gt;: Never feel lost in a meeting again! Simplora turns every conversation into a unique learning experience, in real-time and beyond. Available wherever you meet. No download required. Get started for free.&lt;br&gt;
&lt;strong&gt;Website&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://www.producthunt.com/r/EGCNW2QYNQ52JB?utm_campaign=producthunt-api&amp;amp;utm_medium=api-v2&amp;amp;utm_source=Application%3A&amp;#43;weekly&amp;#43;%28ID%3A&amp;#43;148189%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;open&lt;/a&gt;&lt;br&gt;
&lt;strong&gt;Product Hunt&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://www.producthunt.com/products/simplora?utm_campaign=producthunt-api&amp;amp;utm_medium=api-v2&amp;amp;utm_source=Application%3A&amp;#43;weekly&amp;#43;%28ID%3A&amp;#43;148189%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;View on Product Hunt&lt;/a&gt;&lt;br&gt;
&lt;img src=&#34;https://ph-files.imgix.net/8d6520ce-6029-468d-a074-d99967a9dccc.png?auto=format&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;Simplora&#34;
	
	
&gt;&lt;br&gt;
&lt;strong&gt;Keyword&lt;/strong&gt;: Meetings, learning, real-time, no download, free, smarter, confusion, conversation, Simplora&lt;br&gt;
&lt;strong&gt;VotesCount&lt;/strong&gt;: 🔺188&lt;br&gt;
&lt;strong&gt;Featured&lt;/strong&gt;: Yes&lt;br&gt;
&lt;strong&gt;CreatedAt&lt;/strong&gt;: 2025-10-20 07:01 AM (UTC)&lt;/p&gt;
&lt;hr&gt;
&lt;h2 id=&#34;6-diny&#34;&gt;6. diny
&lt;/h2&gt;&lt;p&gt;&lt;strong&gt;Tagline&lt;/strong&gt;: From git diff to clean commits&lt;br&gt;
&lt;strong&gt;Description&lt;/strong&gt;: diny automates commit messages from your staged changes. Clean, consistent, conventional. Includes a timeline view of past commits to keep your history crystal clear.&lt;br&gt;
&lt;strong&gt;Website&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://www.producthunt.com/r/GCRJHKK2B3RWT5?utm_campaign=producthunt-api&amp;amp;utm_medium=api-v2&amp;amp;utm_source=Application%3A&amp;#43;weekly&amp;#43;%28ID%3A&amp;#43;148189%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;open&lt;/a&gt;&lt;br&gt;
&lt;strong&gt;Product Hunt&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://www.producthunt.com/products/diny?utm_campaign=producthunt-api&amp;amp;utm_medium=api-v2&amp;amp;utm_source=Application%3A&amp;#43;weekly&amp;#43;%28ID%3A&amp;#43;148189%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;View on Product Hunt&lt;/a&gt;&lt;br&gt;
&lt;img src=&#34;https://ph-files.imgix.net/539d4587-2480-44fe-9e56-e972a86a8945.png?auto=format&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;diny&#34;
	
	
&gt;&lt;br&gt;
&lt;strong&gt;Keyword&lt;/strong&gt;: git commits, commit messages, automation, git diff, clean commits, conventional commits, commit history, timeline view, developer tools&lt;br&gt;
&lt;strong&gt;VotesCount&lt;/strong&gt;: 🔺156&lt;br&gt;
&lt;strong&gt;Featured&lt;/strong&gt;: Yes&lt;br&gt;
&lt;strong&gt;CreatedAt&lt;/strong&gt;: 2025-10-20 07:01 AM (UTC)&lt;/p&gt;
&lt;hr&gt;
&lt;h2 id=&#34;7-pylon&#34;&gt;7. Pylon
&lt;/h2&gt;&lt;p&gt;&lt;strong&gt;Tagline&lt;/strong&gt;: The support platform built for B2B&lt;br&gt;
&lt;strong&gt;Description&lt;/strong&gt;: AI-Native support platform built for B2B companies. One tool for your ticketing, chat, knowledge base, AI support, account intelligence, and more.&lt;br&gt;
&lt;strong&gt;Website&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://www.producthunt.com/r/XWNWAI7CGNNFJB?utm_campaign=producthunt-api&amp;amp;utm_medium=api-v2&amp;amp;utm_source=Application%3A&amp;#43;weekly&amp;#43;%28ID%3A&amp;#43;148189%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;open&lt;/a&gt;&lt;br&gt;
&lt;strong&gt;Product Hunt&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://www.producthunt.com/products/pylon-4?utm_campaign=producthunt-api&amp;amp;utm_medium=api-v2&amp;amp;utm_source=Application%3A&amp;#43;weekly&amp;#43;%28ID%3A&amp;#43;148189%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;View on Product Hunt&lt;/a&gt;&lt;br&gt;
&lt;img src=&#34;https://ph-files.imgix.net/b4955206-9405-4fbb-a55b-28ae15e6a5e5.jpeg?auto=format&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;Pylon&#34;
	
	
&gt;&lt;br&gt;
&lt;strong&gt;Keyword&lt;/strong&gt;: B2B support, AI support, ticketing, chat, knowledge base, account intelligence, support platform, AI-native&lt;br&gt;
&lt;strong&gt;VotesCount&lt;/strong&gt;: 🔺138&lt;br&gt;
&lt;strong&gt;Featured&lt;/strong&gt;: Yes&lt;br&gt;
&lt;strong&gt;CreatedAt&lt;/strong&gt;: 2025-10-20 07:01 AM (UTC)&lt;/p&gt;
&lt;hr&gt;
&lt;h2 id=&#34;8-app2dev&#34;&gt;8. App2.dev
&lt;/h2&gt;&lt;p&gt;&lt;strong&gt;Tagline&lt;/strong&gt;: Turn ideas &amp;amp; Figma designs into complete web &amp;amp; mobile apps&lt;br&gt;
&lt;strong&gt;Description&lt;/strong&gt;: Turn your ideas &amp;amp; Figma designs into web &amp;amp; mobile apps in minutes with backend, database, and authentication - all powered by AI.&lt;br&gt;
&lt;strong&gt;Website&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://www.producthunt.com/r/JWOK7RUANFXLZY?utm_campaign=producthunt-api&amp;amp;utm_medium=api-v2&amp;amp;utm_source=Application%3A&amp;#43;weekly&amp;#43;%28ID%3A&amp;#43;148189%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;open&lt;/a&gt;&lt;br&gt;
&lt;strong&gt;Product Hunt&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://www.producthunt.com/products/app2-dev?utm_campaign=producthunt-api&amp;amp;utm_medium=api-v2&amp;amp;utm_source=Application%3A&amp;#43;weekly&amp;#43;%28ID%3A&amp;#43;148189%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;View on Product Hunt&lt;/a&gt;&lt;br&gt;
&lt;img src=&#34;https://ph-files.imgix.net/4303b056-9478-4598-8b41-cfb83162495c.png?auto=format&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;App2.dev&#34;
	
	
&gt;&lt;br&gt;
&lt;strong&gt;Keyword&lt;/strong&gt;: App development, Figma to app, web app, mobile app, AI, no-code, backend, database, authentication, rapid development&lt;br&gt;
&lt;strong&gt;VotesCount&lt;/strong&gt;: 🔺114&lt;br&gt;
&lt;strong&gt;Featured&lt;/strong&gt;: Yes&lt;br&gt;
&lt;strong&gt;CreatedAt&lt;/strong&gt;: 2025-10-20 07:01 AM (UTC)&lt;/p&gt;
&lt;hr&gt;
&lt;h2 id=&#34;9-aden-ai&#34;&gt;9. Aden AI
&lt;/h2&gt;&lt;p&gt;&lt;strong&gt;Tagline&lt;/strong&gt;: Turn any file into a chatbot course &amp;amp; get certified with AI&lt;br&gt;
&lt;strong&gt;Description&lt;/strong&gt;: We built the Aden Training Agent - it transforms any file or manual into an interactive AI course for workforce training or certification. Try our Mindfulness Agent that teaches focus under pressure, or upload your own file to create a smart, adaptive course.&lt;br&gt;
&lt;strong&gt;Website&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://www.producthunt.com/r/H4JP7VGQUIIIDS?utm_campaign=producthunt-api&amp;amp;utm_medium=api-v2&amp;amp;utm_source=Application%3A&amp;#43;weekly&amp;#43;%28ID%3A&amp;#43;148189%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;open&lt;/a&gt;&lt;br&gt;
&lt;strong&gt;Product Hunt&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://www.producthunt.com/products/ai-powered-form-that-fills-itself?utm_campaign=producthunt-api&amp;amp;utm_medium=api-v2&amp;amp;utm_source=Application%3A&amp;#43;weekly&amp;#43;%28ID%3A&amp;#43;148189%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;View on Product Hunt&lt;/a&gt;&lt;br&gt;
&lt;img src=&#34;https://ph-files.imgix.net/efb99060-26a3-47fb-8cad-558e3118c08d.png?auto=format&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;Aden AI&#34;
	
	
&gt;&lt;br&gt;
&lt;strong&gt;Keyword&lt;/strong&gt;: AI chatbot course, file to course, workforce training, AI certification, adaptive learning, Mindfulness Agent, training agent, smart course&lt;br&gt;
&lt;strong&gt;VotesCount&lt;/strong&gt;: 🔺104&lt;br&gt;
&lt;strong&gt;Featured&lt;/strong&gt;: Yes&lt;br&gt;
&lt;strong&gt;CreatedAt&lt;/strong&gt;: 2025-10-20 07:01 AM (UTC)&lt;/p&gt;
&lt;hr&gt;
&lt;h2 id=&#34;10-vibeonly&#34;&gt;10. VibeOnly
&lt;/h2&gt;&lt;p&gt;&lt;strong&gt;Tagline&lt;/strong&gt;: Helping companies screen and hire AI-fluent employees&lt;br&gt;
&lt;strong&gt;Description&lt;/strong&gt;: Everyone says &amp;ldquo;AI won’t take your job. People who use it will&amp;rdquo;. Vibeonly helps you hire those people. It’s a test that shows who really knows how to use AI tools really well. Perfect for founders and hiring managers who want elite AI fluent talent.&lt;br&gt;
&lt;strong&gt;Website&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://www.producthunt.com/r/7LP6JGJC5IXNPT?utm_campaign=producthunt-api&amp;amp;utm_medium=api-v2&amp;amp;utm_source=Application%3A&amp;#43;weekly&amp;#43;%28ID%3A&amp;#43;148189%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;open&lt;/a&gt;&lt;br&gt;
&lt;strong&gt;Product Hunt&lt;/strong&gt;: &lt;a class=&#34;link&#34; href=&#34;https://www.producthunt.com/products/vibeonly?utm_campaign=producthunt-api&amp;amp;utm_medium=api-v2&amp;amp;utm_source=Application%3A&amp;#43;weekly&amp;#43;%28ID%3A&amp;#43;148189%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;View on Product Hunt&lt;/a&gt;&lt;br&gt;
&lt;img src=&#34;https://ph-files.imgix.net/ec9cc838-b83d-4c33-995b-fc03c39ec778.png?auto=format&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;VibeOnly&#34;
	
	
&gt;&lt;br&gt;
&lt;strong&gt;Keyword&lt;/strong&gt;: AI hiring, AI fluency, employee screening, AI talent, hiring, AI tools, VibeOnly, talent acquisition&lt;br&gt;
&lt;strong&gt;VotesCount&lt;/strong&gt;: 🔺100&lt;br&gt;
&lt;strong&gt;Featured&lt;/strong&gt;: Yes&lt;br&gt;
&lt;strong&gt;CreatedAt&lt;/strong&gt;: 2025-10-20 07:01 AM (UTC)&lt;/p&gt;
&lt;hr&gt;
</description>
        </item>
        <item>
        <title>Duix.Heygem</title>
        <link>https://producthunt.programnotes.cn/en/p/duix.heygem/</link>
        <pubDate>Wed, 28 May 2025 15:29:52 +0800</pubDate>
        
        <guid>https://producthunt.programnotes.cn/en/p/duix.heygem/</guid>
        <description>&lt;img src="https://images.unsplash.com/photo-1694248607966-7e6ac34c6e56?ixid=M3w0NjAwMjJ8MHwxfHJhbmRvbXx8fHx8fHx8fDE3NDg0MTcyOTd8&amp;ixlib=rb-4.1.0" alt="Featured image of post Duix.Heygem" /&gt;&lt;h1 id=&#34;duixcomduixheygem&#34;&gt;&lt;a class=&#34;link&#34; href=&#34;https://github.com/duixcom/Duix.Heygem&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;duixcom/Duix.Heygem&lt;/a&gt;
&lt;/h1&gt;&lt;div align=&#34;center&#34;&gt;
  &lt;img src=&#34;README.assets/1.png&#34; style=&#34;width: 220px; height: auto;&#34;/&gt;
&lt;/div&gt;
&lt;div align=&#34;center&#34;&gt;
  &lt;h1&gt;HeyGem - Open Source Alternative to Heygen&lt;/h1&gt;
&lt;/div&gt;
&lt;h1 id=&#34;table-of-contents&#34;&gt;Table of Contents
&lt;/h1&gt;&lt;ol&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#1-whats-heygem&#34; &gt;What&amp;rsquo;s HeyGem&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#2-introduction&#34; &gt;Introduction&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#3-how-to-run-locally&#34; &gt;How to Run Locally&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#4-open-apis&#34; &gt;Open APIs&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#5-whats-new&#34; &gt;What&amp;rsquo;s New&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#6-faq&#34; &gt;FAQ&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#7-how-to-interact-in-real-time&#34; &gt;How to Interact in real time&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#8-contact&#34; &gt;Contact&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#9-license&#34; &gt;License&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#10-acknowledgments&#34; &gt;Acknowledgments&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#11-star-history&#34; &gt;Star History&lt;/a&gt;&lt;/li&gt;
&lt;/ol&gt;
&lt;hr&gt;
&lt;h2 id=&#34;1-whats-heygem&#34;&gt;1. What&amp;rsquo;s HeyGem
&lt;/h2&gt;&lt;p&gt;&lt;strong&gt;HeyGem&lt;/strong&gt; is a free and open-source AI avatar project developed by &lt;strong&gt;Duix.com&lt;/strong&gt;.&lt;/p&gt;
&lt;p&gt;Seven years ago, a group of young pioneers chose an unconventional technical path, developing a method to train digital human models using real-person video data. Unlike traditional costly 3D digital human approaches, we leveraged AI-generated technology to create ultra-realistic digital humans, slashing production costs from hundreds of thousands of dollars to just $1,000. This innovation has empowered over 10,000 enterprises and generated over 500,000 personalized avatars for professionals across fields – educators, content creators, legal experts, medical practitioners, and entrepreneurs – dramatically enhancing their video production efficiency. However, our vision extends beyond commercial applications. We believe this transformative technology should be accessible to everyone. To democratize digital human creation, we&amp;rsquo;ve open-sourced our cloning technology and video production framework. Our commitment remains: breaking down technological barriers to make cutting-edge tools available to all. Now, anyone with a computer can freely craft their own AI Avatar and produce videos at zero cost – this is the essence of  &lt;strong&gt;HeyGem&lt;/strong&gt;.&lt;/p&gt;
&lt;h2 id=&#34;2-introduction&#34;&gt;2. Introduction
&lt;/h2&gt;&lt;p&gt;&lt;img src=&#34;https://producthunt.programnotes.cn/README.assets/2.png&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;img&#34;
	
	
&gt;&lt;/p&gt;
&lt;p&gt;Heygem is a fully offline video synthesis tool designed for Windows systems that can precisely clone your appearance and voice, digitalizing your image. You can create videos by driving virtual avatars through text and voice. No internet connection is required, protecting your privacy while enjoying convenient and efficient digital experiences.&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;Core Features
&lt;ul&gt;
&lt;li&gt;Precise Appearance and Voice Cloning: Using advanced AI algorithms to capture human facial features with high precision, including facial features, contours, etc., to build realistic virtual models. It can also precisely clone voices, capturing and reproducing subtle characteristics of human voices, supporting various voice parameter settings to create highly similar cloning effects.&lt;/li&gt;
&lt;li&gt;Text and Voice-Driven Virtual Avatars: Understanding text content through natural language processing technology, converting text into natural and fluent speech to drive virtual avatars. Voice input can also be used directly, allowing virtual avatars to perform corresponding actions and facial expressions based on the rhythm and intonation of the voice, making the virtual avatar&amp;rsquo;s performance more natural and vivid.&lt;/li&gt;
&lt;li&gt;Efficient Video Synthesis: Highly synchronizing digital human video images with sound, achieving natural and smooth lip-syncing, intelligently optimizing audio-video synchronization effects.&lt;/li&gt;
&lt;li&gt;Multi-language Support: Scripts support eight languages - English, Japanese, Korean, Chinese, French, German, Arabic, and Spanish.&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;Key Advantages
&lt;ul&gt;
&lt;li&gt;Fully Offline Operation: No internet connection required, effectively protecting user privacy, allowing users to create in a secure, independent environment, avoiding potential data leaks during network transmission.&lt;/li&gt;
&lt;li&gt;User-Friendly: Clean and intuitive interface, easy to use even for beginners with no technical background, quickly mastering the software&amp;rsquo;s usage to start their digital human creation journey.&lt;/li&gt;
&lt;li&gt;Multiple Model Support: Supports importing multiple models and managing them through one-click startup packages, making it convenient for users to choose suitable models based on different creative needs and application scenarios.&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;Technical Support
&lt;ul&gt;
&lt;li&gt;Voice Cloning Technology: Using advanced technologies like artificial intelligence to generate similar or identical voices based on given voice samples, covering context, intonation, speed, and other aspects of speech.&lt;/li&gt;
&lt;li&gt;Automatic Speech Recognition: Technology that converts human speech vocabulary content into computer-readable input (text format), enabling computers to &amp;ldquo;understand&amp;rdquo; human speech.&lt;/li&gt;
&lt;li&gt;Computer Vision Technology: Used in video synthesis for visual processing, including facial recognition and lip movement analysis, ensuring virtual avatar lip movements match voice and text content.&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;h2 id=&#34;3-how-to-run-locally&#34;&gt;3. How to Run Locally
&lt;/h2&gt;&lt;p&gt;HeyGem supports Docker-based rapid deployment. Prior to deployment, ensure your hardware and software environments meet the specified requirements.&lt;/p&gt;
&lt;p&gt;HeyGem support two deployment modes：Windows / Ubuntu 22.04 Installation&lt;/p&gt;
&lt;h3 id=&#34;dependencies&#34;&gt;&lt;strong&gt;Dependencies&lt;/strong&gt;
&lt;/h3&gt;&lt;ol&gt;
&lt;li&gt;Nodejs 18&lt;/li&gt;
&lt;li&gt;Docker Images
&lt;ul&gt;
&lt;li&gt;docker pull guiji2025/fun-asr&lt;/li&gt;
&lt;li&gt;docker pull guiji2025/fish-speech-ziming&lt;/li&gt;
&lt;li&gt;docker pull guiji2025/heygem.ai&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ol&gt;
&lt;h3 id=&#34;mode-1windows-installation&#34;&gt;Mode 1：Windows Installation
&lt;/h3&gt;&lt;p&gt;&lt;strong&gt;System Requirements:&lt;/strong&gt;&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;Currently supports Windows 10 19042.1526 or higher&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;strong&gt;Hardware Requirements：&lt;/strong&gt;&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;
&lt;p&gt;Must have D Drive: Mainly used for storing digital human and project data&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;Free space requirement: More than 30GB&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;C Drive: Used for storing service image files&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;
&lt;p&gt;Free space requirement: More than 100GB&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;If less than 100GB is available, after installing Docker, you can choose a different disk folder with more than 100GB of remaining space at the location shown below.&lt;/p&gt;
&lt;p&gt;&lt;img src=&#34;https://producthunt.programnotes.cn/README.assets/7.png&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;img&#34;
	
	
&gt;&lt;/p&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;Recommended Configuration:&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;CPU: 13th Gen Intel Core i5-13400F&lt;/li&gt;
&lt;li&gt;Memory: 32GB&lt;/li&gt;
&lt;li&gt;Graphics Card: RTX 4070&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;Ensure you have an NVIDIA graphics card with properly installed drivers&lt;/p&gt;
&lt;blockquote&gt;
&lt;p&gt;NVIDIA driver download link: &lt;a class=&#34;link&#34; href=&#34;https://www.nvidia.cn/drivers/lookup/&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;https://www.nvidia.cn/drivers/lookup/&lt;/a&gt;&lt;/p&gt;
&lt;/blockquote&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;h4 id=&#34;installing-windows-docker&#34;&gt;&lt;strong&gt;Installing Windows Docker&lt;/strong&gt;
&lt;/h4&gt;&lt;ol&gt;
&lt;li&gt;
&lt;p&gt;Use the command &lt;code&gt;wsl --list --verbose&lt;/code&gt; to check if WSL is installed. If it shows as below, it&amp;rsquo;s already installed and no further installation is needed.&lt;/p&gt;
&lt;p&gt;&lt;img src=&#34;https://producthunt.programnotes.cn/README.assets/11.png&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;img&#34;
	
	
&gt;&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;Update WSL using &lt;code&gt;wsl --update&lt;/code&gt;.&lt;/p&gt;
&lt;p&gt;&lt;img src=&#34;https://producthunt.programnotes.cn/README.assets/10.png&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;img&#34;
	
	
&gt;&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;https://www.docker.com/&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;Download Docker for Windows&lt;/a&gt;, choose the appropriate installation package based on your CPU architecture.&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;When you see this interface, installation is successful.&lt;/p&gt;
&lt;p&gt;&lt;img src=&#34;https://producthunt.programnotes.cn/README.assets/5.png&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;img&#34;
	
	
&gt;&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;Run Docker&lt;/p&gt;
&lt;p&gt;&lt;img src=&#34;https://producthunt.programnotes.cn/README.assets/12.png&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;img&#34;
	
	
&gt;&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;Accept the agreement and skip login on first run&lt;/p&gt;
&lt;p&gt;&lt;img src=&#34;https://producthunt.programnotes.cn/README.assets/8.png&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;img&#34;
	
	
&gt;&lt;/p&gt;
&lt;p&gt;&lt;img src=&#34;https://producthunt.programnotes.cn/README.assets/13.png&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;img&#34;
	
	
&gt;&lt;/p&gt;
&lt;p&gt;&lt;img src=&#34;https://producthunt.programnotes.cn/README.assets/3.png&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;img&#34;
	
	
&gt;&lt;/p&gt;
&lt;/li&gt;
&lt;/ol&gt;
&lt;h4 id=&#34;installing-the-server&#34;&gt;&lt;strong&gt;Installing the Server&lt;/strong&gt;
&lt;/h4&gt;&lt;p&gt;Installation using Docker, docker-compose as follows:&lt;/p&gt;
&lt;ol&gt;
&lt;li&gt;
&lt;p&gt;The &lt;code&gt;docker-compose.yml&lt;/code&gt; file is in the &lt;code&gt;/deploy&lt;/code&gt; directory.&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;Execute &lt;code&gt;docker-compose up -d&lt;/code&gt; in the &lt;code&gt;/deploy&lt;/code&gt; directory, if you want to use the lite version, execute &lt;code&gt;docker-compose -f docker-compose-lite.yml up -d&lt;/code&gt;&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;Wait patiently (about half an hour, speed depends on network), download will consume about 70GB of traffic, make sure to use WiFi&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;When you see three services in Docker, it indicates success (the lite version has only one service &lt;code&gt;heygem-gen-video&lt;/code&gt;)&lt;/p&gt;
&lt;p&gt;&lt;img src=&#34;https://producthunt.programnotes.cn/README.assets/6.png&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;img&#34;
	
	
&gt;&lt;/p&gt;
&lt;/li&gt;
&lt;/ol&gt;
&lt;h4 id=&#34;server-deployment-solution-for-nvidia-50-series-graphics-cards&#34;&gt;&lt;strong&gt;Server Deployment Solution for NVIDIA 50 Series Graphics Cards&lt;/strong&gt;
&lt;/h4&gt;&lt;p&gt;For 50 series graphics cards (tested and also works for 30/40 series with CUDA 12.8) Uses the official preview version of PyTorch&lt;/p&gt;
&lt;h4 id=&#34;client&#34;&gt;&lt;strong&gt;Client&lt;/strong&gt;
&lt;/h4&gt;&lt;ol&gt;
&lt;li&gt;Directly download the &lt;a class=&#34;link&#34; href=&#34;https://github.com/GuijiAI/HeyGem.ai/releases&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;officially built installation package&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;Double-click &lt;code&gt;HeyGem-x.x.x-setup.exe&lt;/code&gt; to install&lt;/li&gt;
&lt;/ol&gt;
&lt;h3 id=&#34;mode-2ubuntu-2204-installation&#34;&gt;Mode 2：Ubuntu 22.04 Installation
&lt;/h3&gt;&lt;p&gt;&lt;strong&gt;System Requirements：&lt;/strong&gt;&lt;/p&gt;
&lt;p&gt;We have conducted a complete test on &lt;strong&gt;Ubuntu 22.04&lt;/strong&gt;. However, theoretically, it supports desktop Linux distributions.&lt;/p&gt;
&lt;p&gt;&lt;strong&gt;Hardware Requirements：&lt;/strong&gt;&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;Recommended Configuration&lt;/li&gt;
&lt;li&gt;CPU: 13th Generation Intel Core i5 - 13400F&lt;/li&gt;
&lt;li&gt;Memory: 32G or more (necessary)&lt;/li&gt;
&lt;li&gt;Graphics Card: RTX - 4070 (Ensure you have an NVIDIA graphics card and the graphics card driver is correctly installed)&lt;/li&gt;
&lt;li&gt;Hard Disk: Free space greater than 100G&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;strong&gt;Install Docker:&lt;/strong&gt;&lt;/p&gt;
&lt;p&gt;First, use&lt;code&gt; docker --version&lt;/code&gt; to check if Docker is installed. If it is installed, skip the following steps.&lt;/p&gt;
&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;2
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;3
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-fallback&#34; data-lang=&#34;fallback&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;sudo apt update
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;sudo apt install docker.io
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;sudo apt install docker-compose
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;p&gt;&lt;strong&gt;Install the graphics card driver:&lt;/strong&gt;&lt;/p&gt;
&lt;ol&gt;
&lt;li&gt;Install the graphics card driver by referring to the official documentation(&lt;a class=&#34;link&#34; href=&#34;https://www.nvidia.cn/drivers/lookup/%29&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;https://www.nvidia.cn/drivers/lookup/)&lt;/a&gt;.&lt;/li&gt;
&lt;/ol&gt;
&lt;p&gt;After installation, execute the &lt;code&gt;nvidia-smi&lt;/code&gt; command. If the graphics card information is displayed, the installation is successful.&lt;/p&gt;
&lt;ol start=&#34;2&#34;&gt;
&lt;li&gt;Install the NVIDIA Container Toolkit&lt;/li&gt;
&lt;/ol&gt;
&lt;p&gt;​    The NVIDIA Container Toolkit is a necessary tool for Docker to use NVIDIA GPUs. The installation steps are as follows:&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;Add the NVIDIA package repository:&lt;/li&gt;
&lt;/ul&gt;
&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;2
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;3
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-gdscript3&#34; data-lang=&#34;gdscript3&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;&lt;span class=&#34;n&#34;&gt;distribution&lt;/span&gt;&lt;span class=&#34;o&#34;&gt;=$&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;(&lt;/span&gt;&lt;span class=&#34;o&#34;&gt;.&lt;/span&gt; &lt;span class=&#34;o&#34;&gt;/&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;etc&lt;/span&gt;&lt;span class=&#34;o&#34;&gt;/&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;os&lt;/span&gt;&lt;span class=&#34;o&#34;&gt;-&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;release&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;;&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;echo&lt;/span&gt; &lt;span class=&#34;o&#34;&gt;$&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;ID&lt;/span&gt;&lt;span class=&#34;o&#34;&gt;$&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;VERSION_ID&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;)&lt;/span&gt; \
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &lt;span class=&#34;o&#34;&gt;&amp;amp;&amp;amp;&lt;/span&gt; &lt;span class=&#34;n&#34;&gt;curl&lt;/span&gt; &lt;span class=&#34;o&#34;&gt;-&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;s&lt;/span&gt; &lt;span class=&#34;o&#34;&gt;-&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;L&lt;/span&gt; &lt;span class=&#34;n&#34;&gt;https&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;:&lt;/span&gt;&lt;span class=&#34;o&#34;&gt;//&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;nvidia&lt;/span&gt;&lt;span class=&#34;o&#34;&gt;.&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;github&lt;/span&gt;&lt;span class=&#34;o&#34;&gt;.&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;io&lt;/span&gt;&lt;span class=&#34;o&#34;&gt;/&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;libnvidia&lt;/span&gt;&lt;span class=&#34;o&#34;&gt;-&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;container&lt;/span&gt;&lt;span class=&#34;o&#34;&gt;/&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;gpgkey&lt;/span&gt; &lt;span class=&#34;o&#34;&gt;|&lt;/span&gt; &lt;span class=&#34;n&#34;&gt;sudo&lt;/span&gt; &lt;span class=&#34;n&#34;&gt;apt&lt;/span&gt;&lt;span class=&#34;o&#34;&gt;-&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;key&lt;/span&gt; &lt;span class=&#34;n&#34;&gt;add&lt;/span&gt; &lt;span class=&#34;o&#34;&gt;-&lt;/span&gt; \
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &lt;span class=&#34;o&#34;&gt;&amp;amp;&amp;amp;&lt;/span&gt; &lt;span class=&#34;n&#34;&gt;curl&lt;/span&gt; &lt;span class=&#34;o&#34;&gt;-&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;s&lt;/span&gt; &lt;span class=&#34;o&#34;&gt;-&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;L&lt;/span&gt; &lt;span class=&#34;n&#34;&gt;https&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;:&lt;/span&gt;&lt;span class=&#34;o&#34;&gt;//&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;nvidia&lt;/span&gt;&lt;span class=&#34;o&#34;&gt;.&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;github&lt;/span&gt;&lt;span class=&#34;o&#34;&gt;.&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;io&lt;/span&gt;&lt;span class=&#34;o&#34;&gt;/&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;libnvidia&lt;/span&gt;&lt;span class=&#34;o&#34;&gt;-&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;container&lt;/span&gt;&lt;span class=&#34;o&#34;&gt;/$&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;distribution&lt;/span&gt;&lt;span class=&#34;o&#34;&gt;/&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;libnvidia&lt;/span&gt;&lt;span class=&#34;o&#34;&gt;-&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;container&lt;/span&gt;&lt;span class=&#34;o&#34;&gt;.&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;list&lt;/span&gt; &lt;span class=&#34;o&#34;&gt;|&lt;/span&gt; &lt;span class=&#34;n&#34;&gt;sudo&lt;/span&gt; &lt;span class=&#34;n&#34;&gt;tee&lt;/span&gt; &lt;span class=&#34;o&#34;&gt;/&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;etc&lt;/span&gt;&lt;span class=&#34;o&#34;&gt;/&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;apt&lt;/span&gt;&lt;span class=&#34;o&#34;&gt;/&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;sources&lt;/span&gt;&lt;span class=&#34;o&#34;&gt;.&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;list&lt;/span&gt;&lt;span class=&#34;o&#34;&gt;.&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;d&lt;/span&gt;&lt;span class=&#34;o&#34;&gt;/&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;nvidia&lt;/span&gt;&lt;span class=&#34;o&#34;&gt;-&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;container&lt;/span&gt;&lt;span class=&#34;o&#34;&gt;-&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;toolkit&lt;/span&gt;&lt;span class=&#34;o&#34;&gt;.&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;list&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;ul&gt;
&lt;li&gt;Update the package list and install the toolkit:&lt;/li&gt;
&lt;/ul&gt;
&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;2
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-gdscript3&#34; data-lang=&#34;gdscript3&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;&lt;span class=&#34;n&#34;&gt;sudo&lt;/span&gt; &lt;span class=&#34;n&#34;&gt;apt&lt;/span&gt;&lt;span class=&#34;o&#34;&gt;-&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;get&lt;/span&gt; &lt;span class=&#34;n&#34;&gt;update&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;&lt;span class=&#34;n&#34;&gt;sudo&lt;/span&gt; &lt;span class=&#34;n&#34;&gt;apt&lt;/span&gt;&lt;span class=&#34;o&#34;&gt;-&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;get&lt;/span&gt; &lt;span class=&#34;n&#34;&gt;install&lt;/span&gt; &lt;span class=&#34;o&#34;&gt;-&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;y&lt;/span&gt; &lt;span class=&#34;n&#34;&gt;nvidia&lt;/span&gt;&lt;span class=&#34;o&#34;&gt;-&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;container&lt;/span&gt;&lt;span class=&#34;o&#34;&gt;-&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;toolkit&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;ul&gt;
&lt;li&gt;Configure Docker to use the NVIDIA runtime:&lt;/li&gt;
&lt;/ul&gt;
&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-fallback&#34; data-lang=&#34;fallback&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;sudo nvidia-ctk runtime configure --runtime=docker
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;ul&gt;
&lt;li&gt;Restart the Docker service:&lt;/li&gt;
&lt;/ul&gt;
&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-fallback&#34; data-lang=&#34;fallback&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;sudo systemctl restart docker
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;h4 id=&#34;install-the-server&#34;&gt;&lt;strong&gt;Install the server&lt;/strong&gt;
&lt;/h4&gt;&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;2
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-fallback&#34; data-lang=&#34;fallback&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;cd /deploy
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;docker-compose -f docker-compose-linux.yml up -d
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;h4 id=&#34;install-the-client&#34;&gt;&lt;strong&gt;Install the client&lt;/strong&gt;
&lt;/h4&gt;&lt;ol&gt;
&lt;li&gt;Directly download the Linux version of the &lt;a class=&#34;link&#34; href=&#34;https://github.com/GuijiAI/HeyGem.ai/releases&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;officially built installation package&lt;/a&gt;.&lt;/li&gt;
&lt;li&gt;Double click &lt;code&gt;HeyGem-x.x.x.AppImage&lt;/code&gt; to launch it. No installation is required.&lt;/li&gt;
&lt;/ol&gt;
&lt;p&gt;Reminder: In the Ubuntu system, if you enter the desktop as the &lt;code&gt;root&lt;/code&gt; user, directly double - clicking &lt;code&gt;HeyGem - x.x.x.AppImage&lt;/code&gt; may not work. You need to execute &lt;code&gt;./HeyGem - x.x.x.AppImage --no - sandbox&lt;/code&gt; in the command - line terminal. Adding the &lt;code&gt;--no - sandbox&lt;/code&gt; parameter will do the trick.&lt;/p&gt;
&lt;h2 id=&#34;4-open-apis&#34;&gt;4. Open APIs
&lt;/h2&gt;&lt;p&gt;We have opened APIs for model training and video synthesis. After Docker starts, several ports will be exposed locally, accessible through &lt;code&gt;http://127.0.0.1&lt;/code&gt;.&lt;/p&gt;
&lt;p&gt;For specific code, refer to:&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;src/main/service/model.js&lt;/li&gt;
&lt;li&gt;src/main/service/video.js&lt;/li&gt;
&lt;li&gt;src/main/service/voice.js&lt;/li&gt;
&lt;/ul&gt;
&lt;h3 id=&#34;model-training&#34;&gt;&lt;strong&gt;Model Training&lt;/strong&gt;
&lt;/h3&gt;&lt;ol&gt;
&lt;li&gt;
&lt;p&gt;Separate video into silent video + audio&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;Place audio in&lt;/p&gt;
&lt;p&gt;&lt;code&gt;D:\heygem_data\voice\data&lt;/code&gt; is agreed with the &lt;code&gt;guiji2025/fish-speech-ziming&lt;/code&gt; service, can be modified in docker-compose&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;Call the&lt;/p&gt;
&lt;p&gt;Parameter example:Response example:&lt;strong&gt;Record the response results as they will be needed for subsequent audio synthesis&lt;/strong&gt;&lt;/p&gt;
&lt;/li&gt;
&lt;/ol&gt;
&lt;h3 id=&#34;audio-synthesis&#34;&gt;&lt;strong&gt;Audio Synthesis&lt;/strong&gt;
&lt;/h3&gt;&lt;p&gt;Interface: &lt;code&gt;http://127.0.0.1:18180/v1/invoke&lt;/code&gt;&lt;/p&gt;
&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt; 1
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt; 2
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt; 3
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt; 4
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt; 5
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt; 6
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt; 7
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt; 8
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt; 9
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;10
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;11
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;12
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;13
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;14
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;15
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;16
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;17
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-fallback&#34; data-lang=&#34;fallback&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;// Request parameters
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;{
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &amp;#34;speaker&amp;#34;: &amp;#34;{uuid}&amp;#34;, // A unique UUID
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &amp;#34;text&amp;#34;: &amp;#34;xxxxxxxxxx&amp;#34;, // Text content to synthesize
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &amp;#34;format&amp;#34;: &amp;#34;wav&amp;#34;, // Fixed parameter
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &amp;#34;topP&amp;#34;: 0.7, // Fixed parameter
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &amp;#34;max_new_tokens&amp;#34;: 1024, // Fixed parameter
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &amp;#34;chunk_length&amp;#34;: 100, // Fixed parameter
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &amp;#34;repetition_penalty&amp;#34;: 1.2, // Fixed parameter
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &amp;#34;temperature&amp;#34;: 0.7, // Fixed parameter
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &amp;#34;need_asr&amp;#34;: false, // Fixed parameter
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &amp;#34;streaming&amp;#34;: false, // Fixed parameter
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &amp;#34;is_fixed_seed&amp;#34;: 0, // Fixed parameter
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &amp;#34;is_norm&amp;#34;: 0, // Fixed parameter
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &amp;#34;reference_audio&amp;#34;: &amp;#34;{voice.asr_format_audio_url}&amp;#34;, // Return value from previous &amp;#34;Model Training&amp;#34; step
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &amp;#34;reference_text&amp;#34;: &amp;#34;{voice.reference_audio_text}&amp;#34; // Return value from previous &amp;#34;Model Training&amp;#34; step
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;}
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;h3 id=&#34;video-synthesis&#34;&gt;&lt;strong&gt;Video Synthesis&lt;/strong&gt;
&lt;/h3&gt;&lt;ul&gt;
&lt;li&gt;
&lt;p&gt;Synthesis interface: &lt;code&gt;http://127.0.0.1:8383/easy/submit&lt;/code&gt;&lt;/p&gt;
&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;2
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;3
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;4
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;5
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;6
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;7
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;8
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;9
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-fallback&#34; data-lang=&#34;fallback&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;// Request parameters
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;{
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &amp;#34;audio_url&amp;#34;: &amp;#34;{audioPath}&amp;#34;, // Audio path
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &amp;#34;video_url&amp;#34;: &amp;#34;{videoPath}&amp;#34;, // Video path
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &amp;#34;code&amp;#34;: &amp;#34;{uuid}&amp;#34;, // Unique key
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &amp;#34;chaofen&amp;#34;: 0, // Fixed value
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &amp;#34;watermark_switch&amp;#34;: 0, // Fixed value
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &amp;#34;pn&amp;#34;: 1 // Fixed value
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;}
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;Progress query: &lt;code&gt;http://127.0.0.1:8383/easy/query?code=${taskCode}&lt;/code&gt;&lt;/p&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;GET request, the parameter &lt;code&gt;taskCode&lt;/code&gt; is the &lt;code&gt;code&lt;/code&gt; from the synthesis interface input above&lt;/p&gt;
&lt;h3 id=&#34;important-notice-to-developer-partners&#34;&gt;&lt;strong&gt;Important Notice to Developer Partners&lt;/strong&gt;
&lt;/h3&gt;&lt;p&gt;we are now announcing two parallel service solutions:&lt;/p&gt;
&lt;table&gt;
  &lt;thead&gt;
      &lt;tr&gt;
          &lt;th&gt;&lt;strong&gt;Project&lt;/strong&gt;&lt;/th&gt;
          &lt;th&gt;&lt;strong&gt;HeyGem Open Source Local Deployment&lt;/strong&gt;&lt;/th&gt;
          &lt;th&gt;&lt;strong&gt;Digital Human/Clone Voice API Service&lt;/strong&gt;&lt;/th&gt;
      &lt;/tr&gt;
  &lt;/thead&gt;
  &lt;tbody&gt;
      &lt;tr&gt;
          &lt;td&gt;Usage&lt;/td&gt;
          &lt;td&gt;Open Source Local Deployment&lt;/td&gt;
          &lt;td&gt;Rapid Clone API Service&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;Recommended&lt;/td&gt;
          &lt;td&gt;Technical Users&lt;/td&gt;
          &lt;td&gt;Business Users&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;Technical Threshold&lt;/td&gt;
          &lt;td&gt;Developers with deep learning framework experience/pursuing deep customization/wishing to participate in community co-construction&lt;/td&gt;
          &lt;td&gt;Quick business integration/focus on upper-level application development/need enterprise-level SLA assurance for commercial scenarios&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;Hardware Requirements&lt;/td&gt;
          &lt;td&gt;Need to purchase GPU server&lt;/td&gt;
          &lt;td&gt;No need to purchase GPU server&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;Customization&lt;/td&gt;
          &lt;td&gt;Can modify and extend the code according to your needs, fully controlling the software&amp;rsquo;s functions and behavior&lt;/td&gt;
          &lt;td&gt;Cannot directly modify the source code, can only extend functions through API-provided interfaces, less flexible than open source projects&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;Technical Support&lt;/td&gt;
          &lt;td&gt;Community Support&lt;/td&gt;
          &lt;td&gt;Dynamic expansion support + professional technical response team&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;Maintenance Cost&lt;/td&gt;
          &lt;td&gt;High maintenance cost&lt;/td&gt;
          &lt;td&gt;Simple maintenance&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;Lip Sync Effect&lt;/td&gt;
          &lt;td&gt;Usable effect&lt;/td&gt;
          &lt;td&gt;Stunning and higher definition effect&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;Commercial Authorization&lt;/td&gt;
          &lt;td&gt;Supports global free commercial use (enterprises with more than 100,000 users or annual revenue exceeding 10 million USD need to sign a commercial license agreement)&lt;/td&gt;
          &lt;td&gt;Commercial use allowed&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;Iteration Speed&lt;/td&gt;
          &lt;td&gt;Slow updates, bug fixes depend on the community&lt;/td&gt;
          &lt;td&gt;Latest models/algorithms are prioritized, fast problem resolution&lt;/td&gt;
      &lt;/tr&gt;
  &lt;/tbody&gt;
&lt;/table&gt;
&lt;p&gt;We always adhere to the open source spirit, and the launch of the API service aims to provide a more complete solution matrix for developers with different needs. No matter which method you choose, you can always obtain technical support documents through &lt;a class=&#34;link&#34; href=&#34;https://duix.com/&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;https://duix.com&lt;/a&gt;&lt;/p&gt;
&lt;p&gt;We look forward to working with you to promote the inclusive development of digital human technology!&lt;/p&gt;
&lt;p&gt;You can chat with Heygem Digital Human on the official website: &lt;a class=&#34;link&#34; href=&#34;https://duix.com/&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;https://duix.com/&lt;/a&gt;&lt;/p&gt;
&lt;p&gt;We also provide  APl at DUIX Platform: &lt;a class=&#34;link&#34; href=&#34;https://docs.duix.com/api-reference/api/Introduction&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;https://docs.duix.com/api-reference/api/Introduction&lt;/a&gt;&lt;/p&gt;
&lt;h2 id=&#34;5-whats-new&#34;&gt;5. What&amp;rsquo;s New
&lt;/h2&gt;&lt;h3 id=&#34;heading&#34;&gt;&lt;strong&gt;[Nvidia 50 Series GPU Version Notice]&lt;/strong&gt;
&lt;/h3&gt;&lt;ol&gt;
&lt;li&gt;Tested and verified on 5090 GPU&lt;/li&gt;
&lt;li&gt;For installation instructions, see &lt;a class=&#34;link&#34; href=&#34;https://github.com/duixcom/Duix.Heygem?tab=readme-ov-file#Server-Deployment-Solution-for-NVIDIA-50-Series-Graphics-Cards&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;Server Deployment Solution for NVIDIA 50 Series Graphics Cards&lt;/a&gt;&lt;/li&gt;
&lt;/ol&gt;
&lt;h3 id=&#34;heading-1&#34;&gt;&lt;strong&gt;[New Ubuntu Version Notice]&lt;/strong&gt;
&lt;/h3&gt;&lt;p&gt;&lt;strong&gt;Ubuntu Version Officially Released&lt;/strong&gt;&lt;/p&gt;
&lt;ol&gt;
&lt;li&gt;Adaptation and verification work for Ubuntu 22.04 Desktop version (kernel 6.8.0-52-generic) has been completed. Compatibility testing for other Linux versions has not yet been conducted.&lt;/li&gt;
&lt;li&gt;Added internationalization (English) for the client program interface.&lt;/li&gt;
&lt;li&gt;Fixed some known issues
&lt;ul&gt;
&lt;li&gt;#304&lt;/li&gt;
&lt;li&gt;#292&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;https://github.com/GuijiAI/HeyGem.ai?tab=readme-ov-file#ubuntu-2204-installation&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;Ubuntu22.04 Installation Documentation&lt;/a&gt;&lt;/li&gt;
&lt;/ol&gt;
&lt;h2 id=&#34;6-faq&#34;&gt;6. FAQ
&lt;/h2&gt;&lt;h3 id=&#34;self-check-steps-before-asking-questions&#34;&gt;&lt;strong&gt;Self-Check Steps Before Asking Questions&lt;/strong&gt;
&lt;/h3&gt;&lt;ol&gt;
&lt;li&gt;
&lt;p&gt;Check if all three services are in Running status&lt;/p&gt;
&lt;p&gt;&lt;img src=&#34;https://producthunt.programnotes.cn/README.assets/9.png&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;img&#34;
	
	
&gt;&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;Confirm that your machine has an NVIDIA graphics card and drivers are correctly installed.&lt;/p&gt;
&lt;/li&gt;
&lt;/ol&gt;
&lt;p&gt;All computing power for this project is local. The three services won&amp;rsquo;t start without an NVIDIA graphics card or proper drivers.&lt;/p&gt;
&lt;ol start=&#34;3&#34;&gt;
&lt;li&gt;Ensure both server and client are updated to the latest version. The project is newly open-sourced, the community is very active, and updates are frequent. Your issue might have been resolved in a new version.
&lt;ul&gt;
&lt;li&gt;Server: Go to &lt;code&gt;/deploy&lt;/code&gt; directory and re-execute &lt;code&gt;docker-compose up -d&lt;/code&gt;&lt;/li&gt;
&lt;li&gt;Client: &lt;code&gt;pull&lt;/code&gt; code and re-&lt;code&gt;build&lt;/code&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;https://github.com/GuijiAI/HeyGem.ai/issues&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;GitHub Issues&lt;/a&gt; are continuously updated, issues are being resolved and closed daily. Check frequently, your issue might already be resolved.&lt;/li&gt;
&lt;/ol&gt;
&lt;h3 id=&#34;question-template&#34;&gt;&lt;strong&gt;Question Template&lt;/strong&gt;
&lt;/h3&gt;&lt;ol&gt;
&lt;li&gt;Problem Description&lt;/li&gt;
&lt;/ol&gt;
&lt;p&gt;Describe the reproduction steps in detail, with screenshots if possible.&lt;/p&gt;
&lt;ol start=&#34;2&#34;&gt;
&lt;li&gt;Provide Error Logs
&lt;ul&gt;
&lt;li&gt;
&lt;p&gt;How to get client logs:&lt;/p&gt;
&lt;p&gt;&lt;img src=&#34;https://producthunt.programnotes.cn/README.assets/4.jpeg&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;img&#34;
	
	
&gt;&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;Server logs:&lt;/p&gt;
&lt;p&gt;Find the key location, or click on our three Docker services, and &amp;ldquo;Copy&amp;rdquo; as shown below.&lt;/p&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ol&gt;
&lt;h2 id=&#34;7-how-to-interact-in-real-time&#34;&gt;7. How to Interact in real time
&lt;/h2&gt;&lt;p&gt;HeyGem&amp;rsquo;s digital human realizes digital human cloning and non-real-time video synthesis.&lt;/p&gt;
&lt;p&gt;If you want a digital human to support interaction, you can visit &lt;a class=&#34;link&#34; href=&#34;www.duix.com&#34; &gt;duix.com&lt;/a&gt; to experience the free test.&lt;/p&gt;
&lt;h2 id=&#34;8-contact&#34;&gt;8. Contact
&lt;/h2&gt;&lt;p&gt;If you have any questions, please raise an issue or contact us at &lt;a class=&#34;link&#34; href=&#34;mailto:james@duix.com&#34; &gt;james@duix.com&lt;/a&gt;&lt;/p&gt;
&lt;h2 id=&#34;9-license&#34;&gt;9. License
&lt;/h2&gt;&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;https://github.com/GuijiAI/HeyGem.ai/blob/main/LICENSE&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;https://github.com/GuijiAI/HeyGem.ai/blob/main/LICENSE&lt;/a&gt;&lt;/p&gt;
&lt;h2 id=&#34;10-acknowledgments&#34;&gt;10. Acknowledgments
&lt;/h2&gt;&lt;ul&gt;
&lt;li&gt;ASR based on fun-asr&lt;/li&gt;
&lt;li&gt;TTS based on fish-speech-ziming&lt;/li&gt;
&lt;/ul&gt;
&lt;h2 id=&#34;11-star-history&#34;&gt;11. Star History
&lt;/h2&gt;&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;https://www.star-history.com/#GuijiAI/HeyGem.ai&amp;amp;Date&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;GitHub Star History&lt;/a&gt;&lt;/p&gt;
</description>
        </item>
        <item>
        <title>KrillinAI</title>
        <link>https://producthunt.programnotes.cn/en/p/krillinai/</link>
        <pubDate>Wed, 16 Apr 2025 15:29:17 +0800</pubDate>
        
        <guid>https://producthunt.programnotes.cn/en/p/krillinai/</guid>
        <description>&lt;img src="https://images.unsplash.com/photo-1727175401108-6e8bf73ca114?ixid=M3w0NjAwMjJ8MHwxfHJhbmRvbXx8fHx8fHx8fDE3NDQ3ODg0NTh8&amp;ixlib=rb-4.0.3" alt="Featured image of post KrillinAI" /&gt;&lt;h1 id=&#34;krillinaikrillinai&#34;&gt;&lt;a class=&#34;link&#34; href=&#34;https://github.com/krillinai/KrillinAI&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;krillinai/KrillinAI&lt;/a&gt;
&lt;/h1&gt;&lt;div align=&#34;center&#34;&gt;
  &lt;img src=&#34;./docs/images/logo.png&#34; alt=&#34;KrillinAI&#34; height=&#34;90&#34;&gt;
&lt;h1 id=&#34;ai-audiovideo-translation-and-dubbing-tool&#34;&gt;AI Audio&amp;amp;Video Translation and Dubbing Tool
&lt;/h1&gt;&lt;p&gt;&lt;a href=&#34;https://trendshift.io/repositories/13360&#34; target=&#34;_blank&#34;&gt;&lt;img src=&#34;https://trendshift.io/api/badge/repositories/13360&#34; alt=&#34;krillinai%2FKrillinAI | Trendshift&#34; style=&#34;width: 250px; height: 55px;&#34; width=&#34;250&#34; height=&#34;55&#34;/&gt;&lt;/a&gt;&lt;/p&gt;
&lt;p&gt;&lt;strong&gt;&lt;a class=&#34;link&#34; href=&#34;./README.md&#34; &gt;English&lt;/a&gt;｜&lt;a class=&#34;link&#34; href=&#34;./docs/README_zh.md&#34; &gt;简体中文&lt;/a&gt;｜&lt;a class=&#34;link&#34; href=&#34;./docs/README_jp.md&#34; &gt;日本語&lt;/a&gt;｜&lt;a class=&#34;link&#34; href=&#34;./docs/README_kr.md&#34; &gt;한국어&lt;/a&gt;｜&lt;a class=&#34;link&#34; href=&#34;./docs/README_fr.md&#34; &gt;Français&lt;/a&gt;｜&lt;a class=&#34;link&#34; href=&#34;./docs/README_de.md&#34; &gt;Deutsch&lt;/a&gt;｜&lt;a class=&#34;link&#34; href=&#34;./docs/README_es.md&#34; &gt;Español&lt;/a&gt;｜&lt;a class=&#34;link&#34; href=&#34;./docs/README_pt.md&#34; &gt;Português&lt;/a&gt;｜&lt;a class=&#34;link&#34; href=&#34;./docs/README_rus.md&#34; &gt;Русский&lt;/a&gt;｜&lt;a class=&#34;link&#34; href=&#34;./docs/README_ar.md&#34; &gt;اللغة العربية&lt;/a&gt;&lt;/strong&gt;&lt;/p&gt;
&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;https://x.com/KrillinAI&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;img src=&#34;https://img.shields.io/badge/Twitter-KrillinAI-orange?logo=twitter&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;Twitter&#34;
	
	
&gt;&lt;/a&gt;
&lt;a class=&#34;link&#34; href=&#34;https://space.bilibili.com/242124650&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;img src=&#34;https://img.shields.io/badge/dynamic/json?label=Bilibili&amp;amp;query=%24.data.follower&amp;amp;suffix=%20followers&amp;amp;url=https%3A%2F%2Fapi.bilibili.com%2Fx%2Frelation%2Fstat%3Fvmid%3D242124650&amp;amp;logo=bilibili&amp;amp;color=00A1D6&amp;amp;labelColor=FE7398&amp;amp;logoColor=FFFFFF&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;Bilibili&#34;
	
	
&gt;&lt;/a&gt;
&lt;a class=&#34;link&#34; href=&#34;https://jq.qq.com/?_wv=1027&amp;amp;k=754069680&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;img src=&#34;https://img.shields.io/badge/QQ%20%e7%be%a4-754069680-green?logo=tencent-qq&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;QQ 群&#34;
	
	
&gt;&lt;/a&gt;&lt;/p&gt;
&lt;/div&gt;
&lt;h3 id=&#34;-new-release-for-win--mac-desktop-version--welcome-to-test-and-provide-feedback&#34;&gt;📢 New Release for Win &amp;amp; Mac Desktop Version – Welcome to Test and Provide Feedback
&lt;/h3&gt;&lt;h2 id=&#34;overview&#34;&gt;Overview
&lt;/h2&gt;&lt;p&gt;Krillin AI is an all-in-one solution for effortless video localization and enhancement. This minimalist yet powerful tool handles everything from translation, dubbing to voice cloning，formatting—seamlessly converting videos between landscape and portrait modes for optimal display across all content platforms(YouTube, TikTok, Bilibili, Douyin, WeChat Channel, RedNote, Kuaishou). With its end-to-end workflow, Krillin AI transforms raw footage into polished, platform-ready content in just a few clicks.&lt;/p&gt;
&lt;h2 id=&#34;key-features&#34;&gt;Key Features:
&lt;/h2&gt;&lt;p&gt;🎯 &lt;strong&gt;One-Click Start&lt;/strong&gt; - Launch your workflow instantly,New desktop version available—easier to use!&lt;/p&gt;
&lt;p&gt;📥 &lt;strong&gt;Video download&lt;/strong&gt; - yt-dlp and local file uploading supported&lt;/p&gt;
&lt;p&gt;📜 &lt;strong&gt;Precise Subtitles&lt;/strong&gt; - Whisper-powered high-accuracy recognition&lt;/p&gt;
&lt;p&gt;🧠 &lt;strong&gt;Smart Segmentation&lt;/strong&gt; - LLM-based subtitle chunking &amp;amp; alignment&lt;/p&gt;
&lt;p&gt;🌍 &lt;strong&gt;Professional Translation&lt;/strong&gt; - Paragraph-level translation for consistency&lt;/p&gt;
&lt;p&gt;🔄 &lt;strong&gt;Term Replacement&lt;/strong&gt; - One-click domain-specific vocabulary swap&lt;/p&gt;
&lt;p&gt;🎙️ &lt;strong&gt;Dubbing and Voice Cloning&lt;/strong&gt; - CosyVoice selected or cloning voices&lt;/p&gt;
&lt;p&gt;🎬 &lt;strong&gt;Video Composition&lt;/strong&gt; - Auto-formatting for horizontal/vertical layouts&lt;/p&gt;
&lt;h2 id=&#34;showcase&#34;&gt;Showcase
&lt;/h2&gt;&lt;p&gt;The following picture demonstrates the effect after the subtitle file, which was generated through a one-click operation after importing a 46-minute local video, was inserted into the track. There was no manual adjustment involved at all. There are no missing or overlapping subtitles, the sentence segmentation is natural, and the translation quality is also quite high.&lt;/p&gt;
&lt;table&gt;
&lt;tr&gt;
&lt;td width=&#34;33%&#34;&gt;
&lt;h3 id=&#34;subtitle-translation&#34;&gt;Subtitle Translation
&lt;/h3&gt;&lt;hr&gt;
&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;https://github.com/user-attachments/assets/bba1ac0a-fe6b-4947-b58d-ba99306d0339&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;https://github.com/user-attachments/assets/bba1ac0a-fe6b-4947-b58d-ba99306d0339&lt;/a&gt;&lt;/p&gt;
&lt;/td&gt;
&lt;td width=&#34;33%&#34;&gt;
&lt;h3 id=&#34;dubbing&#34;&gt;Dubbing
&lt;/h3&gt;&lt;hr&gt;
&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;https://github.com/user-attachments/assets/0b32fad3-c3ad-4b6a-abf0-0865f0dd2385&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;https://github.com/user-attachments/assets/0b32fad3-c3ad-4b6a-abf0-0865f0dd2385&lt;/a&gt;&lt;/p&gt;
&lt;/td&gt;
&lt;td width=&#34;33%&#34;&gt;
&lt;h3 id=&#34;portrait&#34;&gt;Portrait
&lt;/h3&gt;&lt;hr&gt;
&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;https://github.com/user-attachments/assets/c2c7b528-0ef8-4ba9-b8ac-f9f92f6d4e71&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;https://github.com/user-attachments/assets/c2c7b528-0ef8-4ba9-b8ac-f9f92f6d4e71&lt;/a&gt;&lt;/p&gt;
&lt;/td&gt;
&lt;/tr&gt;
&lt;/table&gt;
&lt;h2 id=&#34;-speech-recognition-support&#34;&gt;🔍 Speech Recognition Support
&lt;/h2&gt;&lt;p&gt;&lt;em&gt;&lt;strong&gt;All local models in the table below support automatic installation of executable files + model files. Just make your selection, and KrillinAI will handle everything else for you.&lt;/strong&gt;&lt;/em&gt;&lt;/p&gt;
&lt;table&gt;
  &lt;thead&gt;
      &lt;tr&gt;
          &lt;th&gt;Service&lt;/th&gt;
          &lt;th&gt;Supported Platforms&lt;/th&gt;
          &lt;th&gt;Model Options&lt;/th&gt;
          &lt;th&gt;Local/Cloud&lt;/th&gt;
          &lt;th&gt;Notes&lt;/th&gt;
      &lt;/tr&gt;
  &lt;/thead&gt;
  &lt;tbody&gt;
      &lt;tr&gt;
          &lt;td&gt;&lt;strong&gt;OpenAI Whisper&lt;/strong&gt;&lt;/td&gt;
          &lt;td&gt;Cross-platform&lt;/td&gt;
          &lt;td&gt;-&lt;/td&gt;
          &lt;td&gt;Cloud&lt;/td&gt;
          &lt;td&gt;Fast with excellent results&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;&lt;strong&gt;FasterWhisper&lt;/strong&gt;&lt;/td&gt;
          &lt;td&gt;Windows/Linux&lt;/td&gt;
          &lt;td&gt;&lt;code&gt;tiny&lt;/code&gt;/&lt;code&gt;medium&lt;/code&gt;/&lt;code&gt;large-v2&lt;/code&gt; (recommend medium+)&lt;/td&gt;
          &lt;td&gt;Local&lt;/td&gt;
          &lt;td&gt;Faster speed, no cloud service overhead&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;&lt;strong&gt;WhisperKit&lt;/strong&gt;&lt;/td&gt;
          &lt;td&gt;macOS (Apple Silicon only)&lt;/td&gt;
          &lt;td&gt;&lt;code&gt;large-v2&lt;/code&gt;&lt;/td&gt;
          &lt;td&gt;Local&lt;/td&gt;
          &lt;td&gt;Native optimization for Apple chips&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;&lt;strong&gt;Alibaba Cloud ASR&lt;/strong&gt;&lt;/td&gt;
          &lt;td&gt;Cross-platform&lt;/td&gt;
          &lt;td&gt;-&lt;/td&gt;
          &lt;td&gt;Cloud&lt;/td&gt;
          &lt;td&gt;Bypasses China mainland network issues&lt;/td&gt;
      &lt;/tr&gt;
  &lt;/tbody&gt;
&lt;/table&gt;
&lt;h2 id=&#34;-large-language-model-support&#34;&gt;🚀 Large Language Model Support
&lt;/h2&gt;&lt;p&gt;✅ Compatible with all &lt;strong&gt;OpenAI API-compatible&lt;/strong&gt; cloud/local LLM services including but not limited to:&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;OpenAI&lt;/li&gt;
&lt;li&gt;DeepSeek&lt;/li&gt;
&lt;li&gt;Qwen (Tongyi Qianwen)&lt;/li&gt;
&lt;li&gt;Self-hosted open-source models&lt;/li&gt;
&lt;li&gt;Other OpenAI-format compatible API services&lt;/li&gt;
&lt;/ul&gt;
&lt;h2 id=&#34;-language-support&#34;&gt;🌍 Language Support
&lt;/h2&gt;&lt;p&gt;Input languages: Chinese, English, Japanese, German, Turkish supported (more languages being added)&lt;br&gt;
Translation languages: 56 languages supported, including English, Chinese, Russian, Spanish, French, etc.&lt;/p&gt;
&lt;h2 id=&#34;interface-preview&#34;&gt;Interface Preview
&lt;/h2&gt;&lt;h2 id=&#34;-quick-start&#34;&gt;🚀 Quick Start
&lt;/h2&gt;&lt;h3 id=&#34;basic-steps&#34;&gt;Basic Steps
&lt;/h3&gt;&lt;p&gt;First, download the Release executable file that matches your device&amp;rsquo;s system. Follow the instructions below to choose between the desktop or non-desktop version, then place the software in an empty folder. Running the program will generate some directories, so keeping it in an empty folder makes management easier.&lt;/p&gt;
&lt;p&gt;[For the desktop version (release files with &amp;ldquo;desktop&amp;rdquo; in the name), refer here]&lt;br&gt;
&lt;em&gt;The desktop version is newly released to address the difficulty beginners face in editing configuration files correctly. It still has some bugs and is being continuously updated.&lt;/em&gt;&lt;/p&gt;
&lt;p&gt;Double-click the file to start using it.&lt;/p&gt;
&lt;p&gt;[For the non-desktop version (release files without &amp;ldquo;desktop&amp;rdquo; in the name), refer here]&lt;br&gt;
&lt;em&gt;The non-desktop version is the original release, with more complex configuration but stable functionality. It is also suitable for server deployment, as it provides a web-based UI.&lt;/em&gt;&lt;/p&gt;
&lt;p&gt;Create a &lt;code&gt;config&lt;/code&gt; folder in the directory, then create a &lt;code&gt;config.toml&lt;/code&gt; file inside it. Copy the contents of the &lt;code&gt;config-example.toml&lt;/code&gt; file from the source code&amp;rsquo;s &lt;code&gt;config&lt;/code&gt; directory into your &lt;code&gt;config.toml&lt;/code&gt; and fill in your configuration details. (If you want to use OpenAI models but don’t know how to get a key, you can join the group for free trial access.)&lt;/p&gt;
&lt;p&gt;Double-click the executable or run it in the terminal to start the service.&lt;/p&gt;
&lt;p&gt;Open your browser and enter http://127.0.0.1:8888 to begin using it. (Replace 8888 with the port number you specified in the config file.)&lt;/p&gt;
&lt;h3 id=&#34;to-macos-users&#34;&gt;To: macOS Users
&lt;/h3&gt;&lt;p&gt;[For the desktop version, i.e., release files with &amp;ldquo;desktop&amp;rdquo; in the name, refer here]&lt;br&gt;
The current packaging method for the desktop version cannot support direct double-click execution or DMG installation due to signing issues. Manual trust configuration is required as follows:&lt;/p&gt;
&lt;ol&gt;
&lt;li&gt;
&lt;p&gt;Open the directory containing the executable file (assuming the filename is KrillinAI_1.0.0_desktop_macOS_arm64) in Terminal&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;Execute the following commands sequentially:&lt;/p&gt;
&lt;/li&gt;
&lt;/ol&gt;
&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;2
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;3
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-fallback&#34; data-lang=&#34;fallback&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;sudo xattr -cr ./KrillinAI_1.0.0_desktop_macOS_arm64  
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;sudo chmod +x ./KrillinAI_1.0.0_desktop_macOS_arm64  
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;./KrillinAI_1.0.0_desktop_macOS_arm64  
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;p&gt;[For the non-desktop version, i.e., release files without &amp;ldquo;desktop&amp;rdquo; in the name, refer here]&lt;br&gt;
This software is not signed, so after completing the file configuration in the &amp;ldquo;Basic Steps,&amp;rdquo; you will need to manually trust the application on macOS. Follow these steps:&lt;/p&gt;
&lt;ol&gt;
&lt;li&gt;Open the terminal and navigate to the directory where the executable file (assuming the file name is &lt;code&gt;KrillinAI_1.0.0_macOS_arm64&lt;/code&gt;) is located.&lt;/li&gt;
&lt;li&gt;Execute the following commands in sequence:&lt;/li&gt;
&lt;/ol&gt;
&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;2
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;3
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-fallback&#34; data-lang=&#34;fallback&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;sudo xattr -rd com.apple.quarantine ./KrillinAI_1.0.0_macOS_arm64
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;sudo chmod +x ./KrillinAI_1.0.0_macOS_arm64
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;./KrillinAI_1.0.0_macOS_arm64
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;p&gt;This will start the service.&lt;/p&gt;
&lt;h3 id=&#34;docker-deployment&#34;&gt;Docker Deployment
&lt;/h3&gt;&lt;p&gt;This project supports Docker deployment. Please refer to the &lt;a class=&#34;link&#34; href=&#34;./docs/docker.md&#34; &gt;Docker Deployment Instructions&lt;/a&gt;.&lt;/p&gt;
&lt;h3 id=&#34;cookie-configuration-instructions&#34;&gt;Cookie Configuration Instructions
&lt;/h3&gt;&lt;p&gt;If you encounter video download failures, please refer to the &lt;a class=&#34;link&#34; href=&#34;./docs/get_cookies.md&#34; &gt;Cookie Configuration Instructions&lt;/a&gt; to configure your cookie information.&lt;/p&gt;
&lt;h3 id=&#34;configuration-help&#34;&gt;Configuration Help
&lt;/h3&gt;&lt;p&gt;The quickest and most convenient configuration method:&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;Select &lt;code&gt;openai&lt;/code&gt; for both &lt;code&gt;transcription_provider&lt;/code&gt; and &lt;code&gt;llm_provider&lt;/code&gt;. In this way, you only need to fill in &lt;code&gt;openai.apikey&lt;/code&gt; in the following three major configuration item categories, namely &lt;code&gt;openai&lt;/code&gt;, &lt;code&gt;local_model&lt;/code&gt;, and &lt;code&gt;aliyun&lt;/code&gt;, and then you can conduct subtitle translation. (Fill in &lt;code&gt;app.proxy&lt;/code&gt;, &lt;code&gt;model&lt;/code&gt; and &lt;code&gt;openai.base_url&lt;/code&gt; as per your own situation.)&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;The configuration method for using the local speech recognition model (macOS is not supported for the time being) (a choice that takes into account cost, speed, and quality):&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;Fill in &lt;code&gt;fasterwhisper&lt;/code&gt; for &lt;code&gt;transcription_provider&lt;/code&gt; and &lt;code&gt;openai&lt;/code&gt; for &lt;code&gt;llm_provider&lt;/code&gt;. In this way, you only need to fill in &lt;code&gt;openai.apikey&lt;/code&gt; and &lt;code&gt;local_model.faster_whisper&lt;/code&gt; in the following three major configuration item categories, namely &lt;code&gt;openai&lt;/code&gt; and &lt;code&gt;local_model&lt;/code&gt;, and then you can conduct subtitle translation. The local model will be downloaded automatically. (The same applies to &lt;code&gt;app.proxy&lt;/code&gt; and &lt;code&gt;openai.base_url&lt;/code&gt; as mentioned above.)&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;The following usage situations require the configuration of Alibaba Cloud:&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;If &lt;code&gt;llm_provider&lt;/code&gt; is filled with &lt;code&gt;aliyun&lt;/code&gt;, it indicates that the large model service of Alibaba Cloud will be used. Consequently, the configuration of the &lt;code&gt;aliyun.bailian&lt;/code&gt; item needs to be set up.&lt;/li&gt;
&lt;li&gt;If &lt;code&gt;transcription_provider&lt;/code&gt; is filled with &lt;code&gt;aliyun&lt;/code&gt;, or if the &amp;ldquo;voice dubbing&amp;rdquo; function is enabled when starting a task, the voice service of Alibaba Cloud will be utilized. Therefore, the configuration of the &lt;code&gt;aliyun.speech&lt;/code&gt; item needs to be filled in.&lt;/li&gt;
&lt;li&gt;If the &amp;ldquo;voice dubbing&amp;rdquo; function is enabled and local audio files are uploaded for voice timbre cloning at the same time, the OSS cloud storage service of Alibaba Cloud will also be used. Hence, the configuration of the &lt;code&gt;aliyun.oss&lt;/code&gt; item needs to be filled in.
Configuration Guide: &lt;a class=&#34;link&#34; href=&#34;./docs/aliyun.md&#34; &gt;Alibaba Cloud Configuration Instructions&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;h2 id=&#34;frequently-asked-questions&#34;&gt;Frequently Asked Questions
&lt;/h2&gt;&lt;p&gt;Please refer to &lt;a class=&#34;link&#34; href=&#34;./docs/faq.md&#34; &gt;Frequently Asked Questions&lt;/a&gt;&lt;/p&gt;
&lt;h2 id=&#34;contribution-guidelines&#34;&gt;Contribution Guidelines
&lt;/h2&gt;&lt;ul&gt;
&lt;li&gt;Do not submit unnecessary files like &lt;code&gt;.vscode&lt;/code&gt;, &lt;code&gt;.idea&lt;/code&gt;, etc. Please make good use of &lt;code&gt;.gitignore&lt;/code&gt; to filter them.&lt;/li&gt;
&lt;li&gt;Do not submit &lt;code&gt;config.toml&lt;/code&gt;; instead, submit &lt;code&gt;config-example.toml&lt;/code&gt;.&lt;/li&gt;
&lt;/ul&gt;
&lt;h2 id=&#34;star-history&#34;&gt;Star History
&lt;/h2&gt;&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;https://star-history.com/#krillinai/KrillinAI&amp;amp;Date&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;img src=&#34;https://api.star-history.com/svg?repos=krillinai/KrillinAI&amp;amp;type=Date&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;Star History Chart&#34;
	
	
&gt;&lt;/a&gt;&lt;/p&gt;
</description>
        </item>
        <item>
        <title>HeyGem.ai</title>
        <link>https://producthunt.programnotes.cn/en/p/heygem.ai/</link>
        <pubDate>Tue, 15 Apr 2025 15:30:25 +0800</pubDate>
        
        <guid>https://producthunt.programnotes.cn/en/p/heygem.ai/</guid>
        <description>&lt;img src="https://images.unsplash.com/photo-1474511019749-26a5a4b632b2?ixid=M3w0NjAwMjJ8MHwxfHJhbmRvbXx8fHx8fHx8fDE3NDQ3MDIxNjZ8&amp;ixlib=rb-4.0.3" alt="Featured image of post HeyGem.ai" /&gt;&lt;h1 id=&#34;guijiaiheygemai&#34;&gt;&lt;a class=&#34;link&#34; href=&#34;https://github.com/GuijiAI/HeyGem.ai&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;GuijiAI/HeyGem.ai&lt;/a&gt;
&lt;/h1&gt;&lt;h1 id=&#34;heygem---open-source-alternative-to-heygen-切换中文&#34;&gt;Heygem - Open Source Alternative to Heygen &lt;a class=&#34;link&#34; href=&#34;./README_zh.md&#34; &gt;【切换中文】&lt;/a&gt;
&lt;/h1&gt;&lt;h2 id=&#34;announcement&#34;&gt;Announcement
&lt;/h2&gt;&lt;p&gt;Heygem digital human cloning intelligent agent and plugins have been successfully launched on the Coze platform. No complex deployment is required, even novice users can easily get started and use it directly.&lt;/p&gt;
&lt;p&gt;Click here to instantly access the Coze store experience👉&lt;a class=&#34;link&#34; href=&#34;https://www.coze.cn/store/agent/7488696243959431206?bid=6ftfk9dtg0g12&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;Silicon-based Intelligent Digital Human Cloning Agent&lt;/a&gt; | &lt;a class=&#34;link&#34; href=&#34;https://www.coze.cn/store/plugin/7488926246634782746&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;Silicon-based Intelligent Digital Human Cloning Plugin&lt;/a&gt;&lt;/p&gt;
&lt;p&gt;Scan the code to watch the operation video&lt;/p&gt;
&lt;img src=&#34;./README_zh.assets/coze-video.png&#34; width=&#34;50%&#34;&gt;
&lt;h2 id=&#34;new-ubuntu-version-notice&#34;&gt;[New Ubuntu Version Notice]
&lt;/h2&gt;&lt;p&gt;&lt;strong&gt;Ubuntu Version Officially Released&lt;/strong&gt;&lt;/p&gt;
&lt;ol&gt;
&lt;li&gt;Adaptation and verification work for Ubuntu 22.04 Desktop version (kernel 6.8.0-52-generic) has been completed. Compatibility testing for other Linux versions has not yet been conducted.&lt;/li&gt;
&lt;li&gt;Added internationalization (English) for the client program interface.&lt;/li&gt;
&lt;li&gt;Fixed some known issues
&lt;ul&gt;
&lt;li&gt;#304&lt;/li&gt;
&lt;li&gt;#292&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;https://github.com/GuijiAI/HeyGem.ai?tab=readme-ov-file#ubuntu-2204-installation&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;Ubuntu22.04 Installation Documentation&lt;/a&gt;&lt;/li&gt;
&lt;/ol&gt;
&lt;h2 id=&#34;important-notice-to-developer-partners&#34;&gt;Important Notice to Developer Partners
&lt;/h2&gt;&lt;p&gt;&lt;strong&gt;Dear Heygem Open Source Community Members:&lt;/strong&gt;&lt;/p&gt;
&lt;p&gt;We sincerely thank you for your enthusiastic attention and active participation in the Heygem digital human open source project! We have noticed that some developers face challenges during local deployment. To better meet the needs of different scenarios, we are now announcing two parallel service solutions:&lt;/p&gt;
&lt;table&gt;
  &lt;thead&gt;
      &lt;tr&gt;
          &lt;th&gt;&lt;strong&gt;Project&lt;/strong&gt;&lt;/th&gt;
          &lt;th&gt;&lt;strong&gt;HeyGem Open Source Local Deployment&lt;/strong&gt;&lt;/th&gt;
          &lt;th&gt;&lt;strong&gt;Digital Human/Clone Voice API Service&lt;/strong&gt;&lt;/th&gt;
      &lt;/tr&gt;
  &lt;/thead&gt;
  &lt;tbody&gt;
      &lt;tr&gt;
          &lt;td&gt;Usage&lt;/td&gt;
          &lt;td&gt;Open Source Local Deployment&lt;/td&gt;
          &lt;td&gt;Rapid Clone API Service&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;Recommended&lt;/td&gt;
          &lt;td&gt;Technical Users&lt;/td&gt;
          &lt;td&gt;Business Users&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;Technical Threshold&lt;/td&gt;
          &lt;td&gt;Developers with deep learning framework experience/pursuing deep customization/wishing to participate in community co-construction&lt;/td&gt;
          &lt;td&gt;Quick business integration/focus on upper-level application development/need enterprise-level SLA assurance for commercial scenarios&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;Hardware Requirements&lt;/td&gt;
          &lt;td&gt;Need to purchase GPU server&lt;/td&gt;
          &lt;td&gt;No need to purchase GPU server&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;Customization&lt;/td&gt;
          &lt;td&gt;Can modify and extend the code according to your needs, fully controlling the software&amp;rsquo;s functions and behavior&lt;/td&gt;
          &lt;td&gt;Cannot directly modify the source code, can only extend functions through API-provided interfaces, less flexible than open source projects&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;Technical Support&lt;/td&gt;
          &lt;td&gt;Community Support&lt;/td&gt;
          &lt;td&gt;Dynamic expansion support + professional technical response team&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;Maintenance Cost&lt;/td&gt;
          &lt;td&gt;High maintenance cost&lt;/td&gt;
          &lt;td&gt;Simple maintenance&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;Lip Sync Effect&lt;/td&gt;
          &lt;td&gt;Usable effect&lt;/td&gt;
          &lt;td&gt;Stunning and higher definition effect&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;Commercial Authorization&lt;/td&gt;
          &lt;td&gt;Supports global free commercial use (enterprises with more than 100,000 users or annual revenue exceeding 10 million USD need to sign a commercial license agreement)&lt;/td&gt;
          &lt;td&gt;Commercial use allowed&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;Iteration Speed&lt;/td&gt;
          &lt;td&gt;Slow updates, bug fixes depend on the community&lt;/td&gt;
          &lt;td&gt;Latest models/algorithms are prioritized, fast problem resolution&lt;/td&gt;
      &lt;/tr&gt;
  &lt;/tbody&gt;
&lt;/table&gt;
&lt;p&gt;We always adhere to the open source spirit, and the launch of the API service aims to provide a more complete solution matrix for developers with different needs. No matter which method you choose, you can always obtain technical support documents through &lt;a class=&#34;link&#34; href=&#34;mailto:James@toolwiz.com&#34; &gt;James@toolwiz.com&lt;/a&gt;. We look forward to working with you to promote the inclusive development of digital human technology!&lt;/p&gt;
&lt;p&gt;&lt;strong&gt;Silicon-based Intelligent Developer Team&lt;/strong&gt;&lt;/p&gt;
&lt;p&gt;&lt;a href=&#34;https://mp.weixin.qq.com/s/vKiBR85E7JyRkr6CxLCppA?mpshare=1&amp;scene=1&amp;srcid=0319sszkopZO6870sGsU0TFc&amp;sharer_shareinfo=cac5ec3bfa62ed558552c7c022821613&amp;sharer_shareinfo_first=cac5ec3bfa62ed558552c7c022821613&amp;from=industrynews#rd&#34; target=&#34;_blank&#34;&gt;From scratch, hand-in-hand to teach you how to create your own HeyGem open source AI digital human!&lt;/a&gt;&lt;/p&gt;
&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;https://app.guiji.cn/platform&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;strong&gt;Rapid Clone API&lt;/strong&gt;&lt;/a&gt; | &lt;a class=&#34;link&#34; href=&#34;https://guiji.cn/digital-docs/introduce/&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;strong&gt;API Documentation Center&lt;/strong&gt;&lt;/a&gt;&lt;/p&gt;
&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;https://app.guiji.cn/platform&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;strong&gt;Real-time Interaction SDK&lt;/strong&gt;&lt;/a&gt; | &lt;a class=&#34;link&#34; href=&#34;https://guiji.cn/duix-light-document/introduce/&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;strong&gt;SDK Documentation Center&lt;/strong&gt;&lt;/a&gt;&lt;/p&gt;
&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;https://github.com/GuijiAI/duix.ai&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;strong&gt;Local Real-time Interaction (realtime) duix.ai Open Source Address&lt;/strong&gt;&lt;/a&gt; |
&lt;a class=&#34;link&#34; href=&#34;https://github.com/GuijiAI/duix.ai/blob/main/duix-android/dh_aigc_android/README.md&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;strong&gt;Android Version&lt;/strong&gt;&lt;/a&gt; |
&lt;a class=&#34;link&#34; href=&#34;https://github.com/GuijiAI/duix.ai/blob/main/duix-ios/GJLocalDigitalDemo/GJLocalDigitalSDK.md&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;strong&gt;IOS Version&lt;/strong&gt;&lt;/a&gt;&lt;/p&gt;
&lt;img src=&#34;./README_zh.assets/cb10263a14cc826e22c2be4bcae01a89.jpg&#34; width=&#34;50%&#34;&gt;
&lt;h2 id=&#34;open-source-co-creation--shared-glory&#34;&gt;Open Source Co-Creation · Shared Glory
&lt;/h2&gt;&lt;p&gt;Since we open-sourced Heygem, global geeks have illuminated the digital avatar matrix in the code universe, with each commit reconstructing the future! But joy is better shared than enjoyed alone—now we invite all experts to join the &amp;ldquo;Open Source Co-Creation Plan,&amp;rdquo; empowering everyone with AI creativity and propelling the Chinese AI fleet towards the stars!&lt;/p&gt;
&lt;ol&gt;
&lt;li&gt;
&lt;p&gt;Co-Creation Content Direction&lt;/p&gt;
&lt;p&gt;Share high-quality videos or articles on Heygem deployment tutorials, optimization guides, and practical cases (Bilibili, Douyin, Xiaohongshu, WeChat Official Accounts, Zhihu, etc.)&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;Open Source Co-Creation Special Reward Pool (Real Cash Rewards!)&lt;/p&gt;
&lt;p&gt;(1) Basic Rewards&lt;/p&gt;
&lt;pre&gt;&lt;code&gt;Content receiving 20-100 likes will be awarded the [Heygem.ai Master Award] and a 20 RMB cash red envelope.

Content receiving 100+ likes will be awarded the [Heygem.ai God Award] and a 50 RMB cash red envelope.
&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;(2) Special Achievements:&lt;/p&gt;
&lt;pre&gt;&lt;code&gt; The monthly MVP will unlock the Open Source Hall of Fame digital badge (permanently on-chain).
&lt;/code&gt;&lt;/pre&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;Participation Method&lt;/p&gt;
&lt;p&gt;Send your creativity to the customer service lady, add a friend with the note &amp;ldquo;Name+999&amp;rdquo;.&lt;/p&gt;
&lt;/li&gt;
&lt;/ol&gt;
&lt;img src=&#34;./README_zh.assets/2025-03-20_14-38-00.jpg&#34; width=&#34;50%&#34;&gt;
&lt;h2 id=&#34;outstanding-co-creation-works-exhibition&#34;&gt;Outstanding Co-Creation Works Exhibition
&lt;/h2&gt;&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;https://www.bilibili.com/video/BV1awQqYZEqB/?spm_id_from=333.337.search-card.all.click&amp;amp;vd_source=618f44772c5dafb47317bb728505d79c&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;HeyGem Digital Human One-Click Start, 8G Video Memory Available, Model Size 10G, No Need for 100G Hard Disk Space, No Need for D Drive, Based on Docker Single Image, Silicon-Based Open Source&lt;/a&gt;&lt;/p&gt;
&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;https://www.bilibili.com/video/BV1ACQSYEErF/?spm_id_from=333.337.search-card.all.click&amp;amp;vd_source=618f44772c5dafb47317bb728505d79c&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;Ai Digital Human 16 - Local Deployment! The Most Popular Open Source Digital Human HeyGem Zero-Basis Hands-On Teaching Setup Tutorial, 20% Generation Stuck Solution, Full Simplified Process with Supporting Files - T8 ComfyUI Tutorial&lt;/a&gt;&lt;/p&gt;
&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;https://www.bilibili.com/video/BV1R3QpYsEY6/?spm_id_from=333.337.search-card.all.click&amp;amp;vd_source=618f44772c5dafb47317bb728505d79c&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;Heygem Open Source Witnessed History! Cyber Worker Revolution!&lt;/a&gt;&lt;/p&gt;
&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;https://www.bilibili.com/video/BV1eWQ6YgEcp/?spm_id_from=333.337.search-card.all.click&amp;amp;vd_source=618f44772c5dafb47317bb728505d79c&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;Digital Human Project Heygem Local Deployment Tutorial&lt;/a&gt;&lt;/p&gt;
&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;http://xhslink.com/a/rQPYqoDSRih8&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;So Tempting! From Paid to Open Source, AI Digital Humans Will Open a New Era&lt;/a&gt;&lt;/p&gt;
&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;http://xhslink.com/a/tX3p5V5tajh8&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;Open Source Free Digital Humans Are Here, Unlimited Times, Fast Cloning&lt;/a&gt;&lt;/p&gt;
&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;http://xhslink.com/a/8UT1kQ7vxjh8&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;AI Digital Humans Are Free! GitHub&amp;rsquo;s Hot Project Can Run on Your Computer&lt;/a&gt;&lt;/p&gt;
&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;https://www.bilibili.com/video/BV1SkoCYpEwh/?share_source=copy_web&amp;amp;vd_source=c38dcdb72a68f2a4e0b3c0f4f9a5a03c&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;The Most Popular Free AI Digital Human, HeyGem V1.0.3, Latest Update, One-Click Integration Package! Super Strong Lip-Sync Effect, Speed Up, Supports Long Videos, Batch Generation, 8G Video Memory Available!&lt;/a&gt;&lt;/p&gt;
&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;https://www.bilibili.com/video/BV1ZgovYGE3u/&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;strong&gt;HeyGem One-Click Package Windows Direct Run Without Docker Silicon-Based Open Source Digital Human&lt;/strong&gt;&lt;/a&gt;&lt;/p&gt;
&lt;h2 id=&#34;introduction&#34;&gt;Introduction
&lt;/h2&gt;&lt;img src=&#34;README_zh.assets/image-20250304114114272.png&#34;&gt;
&lt;p&gt;Heygem is a fully offline video synthesis tool designed for Windows systems that can precisely clone your appearance and voice, digitalizing your image. You can create videos by driving virtual avatars through text and voice. No internet connection is required, protecting your privacy while enjoying convenient and efficient digital experiences.&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;Core Features
&lt;ul&gt;
&lt;li&gt;Precise Appearance and Voice Cloning: Using advanced AI algorithms to capture human facial features with high precision, including facial features, contours, etc., to build realistic virtual models. It can also precisely clone voices, capturing and reproducing subtle characteristics of human voices, supporting various voice parameter settings to create highly similar cloning effects.&lt;/li&gt;
&lt;li&gt;Text and Voice-Driven Virtual Avatars: Understanding text content through natural language processing technology, converting text into natural and fluent speech to drive virtual avatars. Voice input can also be used directly, allowing virtual avatars to perform corresponding actions and facial expressions based on the rhythm and intonation of the voice, making the virtual avatar&amp;rsquo;s performance more natural and vivid.&lt;/li&gt;
&lt;li&gt;Efficient Video Synthesis: Highly synchronizing digital human video images with sound, achieving natural and smooth lip-syncing, intelligently optimizing audio-video synchronization effects.&lt;/li&gt;
&lt;li&gt;Multi-language Support: Scripts support eight languages - English, Japanese, Korean, Chinese, French, German, Arabic, and Spanish.&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;Key Advantages
&lt;ul&gt;
&lt;li&gt;Fully Offline Operation: No internet connection required, effectively protecting user privacy, allowing users to create in a secure, independent environment, avoiding potential data leaks during network transmission.&lt;/li&gt;
&lt;li&gt;User-Friendly: Clean and intuitive interface, easy to use even for beginners with no technical background, quickly mastering the software&amp;rsquo;s usage to start their digital human creation journey.&lt;/li&gt;
&lt;li&gt;Multiple Model Support: Supports importing multiple models and managing them through one-click startup packages, making it convenient for users to choose suitable models based on different creative needs and application scenarios.&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;Technical Support
&lt;ul&gt;
&lt;li&gt;Voice Cloning Technology: Using advanced technologies like artificial intelligence to generate similar or identical voices based on given voice samples, covering context, intonation, speed, and other aspects of speech.&lt;/li&gt;
&lt;li&gt;Automatic Speech Recognition: Technology that converts human speech vocabulary content into computer-readable input (text format), enabling computers to &amp;ldquo;understand&amp;rdquo; human speech.&lt;/li&gt;
&lt;li&gt;Computer Vision Technology: Used in video synthesis for visual processing, including facial recognition and lip movement analysis, ensuring virtual avatar lip movements match voice and text content.&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;h2 id=&#34;dependencies&#34;&gt;Dependencies
&lt;/h2&gt;&lt;ol&gt;
&lt;li&gt;Nodejs 18&lt;/li&gt;
&lt;li&gt;Docker Images
&lt;ul&gt;
&lt;li&gt;docker pull guiji2025/fun-asr&lt;/li&gt;
&lt;li&gt;docker pull guiji2025/fish-speech-ziming&lt;/li&gt;
&lt;li&gt;docker pull guiji2025/heygem.ai&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ol&gt;
&lt;h2 id=&#34;windows-installation&#34;&gt;Windows Installation
&lt;/h2&gt;&lt;h3 id=&#34;prerequisites&#34;&gt;Prerequisites
&lt;/h3&gt;&lt;ol&gt;
&lt;li&gt;
&lt;p&gt;Must have D Drive: Mainly used for storing digital human and project data&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;Free space requirement: More than 30GB&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;C Drive: Used for storing service image files&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;
&lt;p&gt;Free space requirement: More than 100GB&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;If less than 100GB is available, after installing Docker, you can choose a different disk folder with more than 100GB of remaining space at the location shown below.&lt;/p&gt;
&lt;p&gt;&lt;img src=&#34;https://producthunt.programnotes.cn/README_zh.assets/output.png&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;output&#34;
	
	
&gt;&lt;/p&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;System Requirements:&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;Currently supports Windows 10 19042.1526 or higher&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;Recommended Configuration:&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;CPU: 13th Gen Intel Core i5-13400F&lt;/li&gt;
&lt;li&gt;Memory: 32GB&lt;/li&gt;
&lt;li&gt;Graphics Card: RTX 4070&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;Ensure you have an NVIDIA graphics card with properly installed drivers&lt;/p&gt;
&lt;p&gt;NVIDIA driver download link: &lt;a class=&#34;link&#34; href=&#34;https://www.nvidia.cn/drivers/lookup/&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;https://www.nvidia.cn/drivers/lookup/&lt;/a&gt;&lt;/p&gt;
&lt;p&gt;&lt;img src=&#34;https://producthunt.programnotes.cn/README_zh.assets/nvidia.png&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;nvidia&#34;
	
	
&gt;&lt;/p&gt;
&lt;/li&gt;
&lt;/ol&gt;
&lt;h3 id=&#34;installing-windows-docker&#34;&gt;Installing Windows Docker
&lt;/h3&gt;&lt;ol&gt;
&lt;li&gt;
&lt;p&gt;Use the command &lt;code&gt;wsl --list --verbose&lt;/code&gt; to check if WSL is installed. If it shows as below, it&amp;rsquo;s already installed and no further installation is needed.&lt;/p&gt;
&lt;p&gt;&lt;img src=&#34;https://producthunt.programnotes.cn/README_zh.assets/wsl-list.png&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;wsl-list&#34;
	
	
&gt;&lt;/p&gt;
&lt;/li&gt;
&lt;/ol&gt;
&lt;blockquote&gt;
&lt;ul&gt;
&lt;li&gt;WSL installation command: &lt;code&gt;wsl --install&lt;/code&gt;&lt;/li&gt;
&lt;li&gt;May fail due to network issues, try multiple times&lt;/li&gt;
&lt;li&gt;During installation, you&amp;rsquo;ll need to set and remember a new username and password&lt;/li&gt;
&lt;/ul&gt;
&lt;/blockquote&gt;
&lt;ol start=&#34;2&#34;&gt;
&lt;li&gt;
&lt;p&gt;Update WSL using &lt;code&gt;wsl --update&lt;/code&gt;.&lt;/p&gt;
&lt;p&gt;&lt;img src=&#34;https://producthunt.programnotes.cn/README_zh.assets/updatewsl.png&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;updatewsl&#34;
	
	
&gt;&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;https://www.docker.com/&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;Download Docker for Windows&lt;/a&gt;, choose the appropriate installation package based on your CPU architecture.&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;When you see this interface, installation is successful.&lt;/p&gt;
&lt;p&gt;&lt;img src=&#34;https://producthunt.programnotes.cn/README_zh.assets/61eb4c19-3e7a-4791-a266-de4209690cbd.png&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;61eb4c19-3e7a-4791-a266-de4209690cbd&#34;
	
	
&gt;&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;Run Docker&lt;/p&gt;
&lt;p&gt;&lt;img src=&#34;https://producthunt.programnotes.cn/README_zh.assets/shortcut.png&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;shortcut&#34;
	
	
&gt;&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;Accept the agreement and skip login on first run&lt;/p&gt;
&lt;p&gt;&lt;img src=&#34;https://producthunt.programnotes.cn/README_zh.assets/accept.png&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;accept&#34;
	
	
&gt;&lt;/p&gt;
&lt;p&gt;&lt;img src=&#34;https://producthunt.programnotes.cn/README_zh.assets/576746d5-5215-4973-b1ca-c8d7409a6403.png&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;576746d5-5215-4973-b1ca-c8d7409a6403&#34;
	
	
&gt;&lt;/p&gt;
&lt;p&gt;&lt;img src=&#34;https://producthunt.programnotes.cn/README_zh.assets/9a10b7b2-1eea-48c1-b7af-34129fe04446.png&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;9a10b7b2-1eea-48c1-b7af-34129fe04446&#34;
	
	
&gt;&lt;/p&gt;
&lt;/li&gt;
&lt;/ol&gt;
&lt;h3 id=&#34;installing-the-server&#34;&gt;Installing the Server
&lt;/h3&gt;&lt;p&gt;Installation using Docker, docker-compose as follows:&lt;/p&gt;
&lt;ol&gt;
&lt;li&gt;
&lt;p&gt;The &lt;code&gt;docker-compose.yml&lt;/code&gt; file is in the &lt;code&gt;/deploy&lt;/code&gt; directory.&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;Execute &lt;code&gt;docker-compose up -d&lt;/code&gt; in the &lt;code&gt;/deploy&lt;/code&gt; directory, &lt;u&gt;if you want to use the lite version, execute &lt;code&gt;docker-compose -f docker-compose-lite.yml up -d&lt;/code&gt;&lt;/u&gt;&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;Wait patiently (about half an hour, speed depends on network), download will consume about 70GB of traffic, make sure to use WiFi&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;When you see three services in Docker, it indicates success (the lite version has only one service &lt;code&gt;heygem-gen-video&lt;/code&gt;)&lt;/p&gt;
&lt;p&gt;&lt;img src=&#34;https://producthunt.programnotes.cn/README_zh.assets/e29d1922-7c58-46b4-b1e9-961f853f26d4.png&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;e29d1922-7c58-46b4-b1e9-961f853f26d4&#34;
	
	
&gt;&lt;/p&gt;
&lt;/li&gt;
&lt;/ol&gt;
&lt;h3 id=&#34;client&#34;&gt;Client
&lt;/h3&gt;&lt;ol&gt;
&lt;li&gt;Directly download the &lt;a class=&#34;link&#34; href=&#34;https://github.com/GuijiAI/HeyGem.ai/releases&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;officially built installation package&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;Double-click &lt;code&gt;HeyGem-x.x.x-setup.exe&lt;/code&gt; to install&lt;/li&gt;
&lt;/ol&gt;
&lt;h2 id=&#34;ubuntu-2204-installation&#34;&gt;Ubuntu 22.04 Installation
&lt;/h2&gt;&lt;h3 id=&#34;recommended-configuration&#34;&gt;Recommended Configuration
&lt;/h3&gt;&lt;ul&gt;
&lt;li&gt;CPU: 13th Gen Intel Core i5-13400F&lt;/li&gt;
&lt;li&gt;Memory: 32GB or more (required)&lt;/li&gt;
&lt;li&gt;Graphics Card: RTX-4070 (ensure you have an NVIDIA graphics card and the driver is correctly installed)&lt;/li&gt;
&lt;li&gt;Hard Disk: More than 100GB of free space&lt;/li&gt;
&lt;/ul&gt;
&lt;h3 id=&#34;install-docker&#34;&gt;Install Docker
&lt;/h3&gt;&lt;blockquote&gt;
&lt;p&gt;First, check if Docker is installed using &lt;code&gt;docker --version&lt;/code&gt;. If it is installed, skip the following steps.&lt;/p&gt;
&lt;/blockquote&gt;
&lt;ol&gt;
&lt;li&gt;Directly download the &lt;a class=&#34;link&#34; href=&#34;https://github.com/GuijiAI/HeyGem.ai/releases&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;officially built installation package&lt;/a&gt; for the Linux version&lt;/li&gt;
&lt;li&gt;Double-click &lt;code&gt;HeyGem-x.x.x.AppImage&lt;/code&gt; to launch, no installation required&lt;/li&gt;
&lt;/ol&gt;
&lt;blockquote&gt;
&lt;p&gt;Reminder: On Ubuntu systems, if you are using the &lt;code&gt;root&lt;/code&gt; user to access the desktop, double-clicking &lt;code&gt;HeyGem-x.x.x.AppImage&lt;/code&gt; may not work. You need to execute &lt;code&gt;./HeyGem-x.x.x.AppImage --no-sandbox&lt;/code&gt; in the terminal, adding the &lt;code&gt;--no-sandbox&lt;/code&gt; parameter.&lt;/p&gt;
&lt;/blockquote&gt;
&lt;h2 id=&#34;open-apis&#34;&gt;Open APIs
&lt;/h2&gt;&lt;p&gt;We have opened APIs for model training and video synthesis. After Docker starts, several ports will be exposed locally, accessible through &lt;code&gt;http://127.0.0.1&lt;/code&gt;.&lt;/p&gt;
&lt;p&gt;For specific code, refer to:&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;src/main/service/model.js&lt;/li&gt;
&lt;li&gt;src/main/service/video.js&lt;/li&gt;
&lt;li&gt;src/main/service/voice.js&lt;/li&gt;
&lt;/ul&gt;
&lt;h3 id=&#34;model-training&#34;&gt;Model Training
&lt;/h3&gt;&lt;ol&gt;
&lt;li&gt;Separate video into silent video + audio&lt;/li&gt;
&lt;li&gt;Place audio in &lt;code&gt;D:\heygem_data\voice\data&lt;/code&gt;
&lt;blockquote&gt;
&lt;p&gt;&lt;code&gt;D:\heygem_data\voice\data&lt;/code&gt; is agreed with the &lt;code&gt;guiji2025/fish-speech-ziming&lt;/code&gt; service, can be modified in docker-compose&lt;/p&gt;
&lt;/blockquote&gt;
&lt;/li&gt;
&lt;li&gt;Call the &lt;code&gt;http://127.0.0.1:18180/v1/preprocess_and_tran&lt;/code&gt; interface
&lt;blockquote&gt;
&lt;p&gt;Parameter example:&lt;/p&gt;
&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;2
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;3
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;4
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;5
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-json&#34; data-lang=&#34;json&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;&lt;span class=&#34;p&#34;&gt;{&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &lt;span class=&#34;nt&#34;&gt;&amp;#34;format&amp;#34;&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;:&lt;/span&gt; &lt;span class=&#34;s2&#34;&gt;&amp;#34;.wav&amp;#34;&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;,&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &lt;span class=&#34;nt&#34;&gt;&amp;#34;reference_audio&amp;#34;&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;:&lt;/span&gt; &lt;span class=&#34;s2&#34;&gt;&amp;#34;xxxxxx/xxxxx.wav&amp;#34;&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;,&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &lt;span class=&#34;nt&#34;&gt;&amp;#34;lang&amp;#34;&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;:&lt;/span&gt; &lt;span class=&#34;s2&#34;&gt;&amp;#34;zh&amp;#34;&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;&lt;span class=&#34;p&#34;&gt;}&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;p&gt;Response example:&lt;/p&gt;
&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;2
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;3
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;4
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-json&#34; data-lang=&#34;json&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;&lt;span class=&#34;p&#34;&gt;{&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &lt;span class=&#34;nt&#34;&gt;&amp;#34;asr_format_audio_url&amp;#34;&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;:&lt;/span&gt; &lt;span class=&#34;s2&#34;&gt;&amp;#34;xxxx/x/xxx/xxx.wav&amp;#34;&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;,&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &lt;span class=&#34;nt&#34;&gt;&amp;#34;reference_audio_text&amp;#34;&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;:&lt;/span&gt; &lt;span class=&#34;s2&#34;&gt;&amp;#34;xxxxxxxxxxxx&amp;#34;&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;&lt;span class=&#34;p&#34;&gt;}&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;p&gt;&lt;strong&gt;Record the response results as they will be needed for subsequent audio synthesis&lt;/strong&gt;&lt;/p&gt;
&lt;/blockquote&gt;
&lt;/li&gt;
&lt;/ol&gt;
&lt;h3 id=&#34;audio-synthesis&#34;&gt;Audio Synthesis
&lt;/h3&gt;&lt;p&gt;Interface: &lt;code&gt;http://127.0.0.1:18180/v1/invoke&lt;/code&gt;&lt;/p&gt;
&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt; 1
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt; 2
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt; 3
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt; 4
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt; 5
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt; 6
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt; 7
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt; 8
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt; 9
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;10
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;11
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;12
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;13
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;14
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;15
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;16
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;17
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-json&#34; data-lang=&#34;json&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;&lt;span class=&#34;c1&#34;&gt;// Request parameters
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;&lt;span class=&#34;p&#34;&gt;{&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &lt;span class=&#34;nt&#34;&gt;&amp;#34;speaker&amp;#34;&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;:&lt;/span&gt; &lt;span class=&#34;s2&#34;&gt;&amp;#34;{uuid}&amp;#34;&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;,&lt;/span&gt; &lt;span class=&#34;c1&#34;&gt;// A unique UUID
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &lt;span class=&#34;nt&#34;&gt;&amp;#34;text&amp;#34;&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;:&lt;/span&gt; &lt;span class=&#34;s2&#34;&gt;&amp;#34;xxxxxxxxxx&amp;#34;&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;,&lt;/span&gt; &lt;span class=&#34;c1&#34;&gt;// Text content to synthesize
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &lt;span class=&#34;nt&#34;&gt;&amp;#34;format&amp;#34;&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;:&lt;/span&gt; &lt;span class=&#34;s2&#34;&gt;&amp;#34;wav&amp;#34;&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;,&lt;/span&gt; &lt;span class=&#34;c1&#34;&gt;// Fixed parameter
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &lt;span class=&#34;nt&#34;&gt;&amp;#34;topP&amp;#34;&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;:&lt;/span&gt; &lt;span class=&#34;mf&#34;&gt;0.7&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;,&lt;/span&gt; &lt;span class=&#34;c1&#34;&gt;// Fixed parameter
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &lt;span class=&#34;nt&#34;&gt;&amp;#34;max_new_tokens&amp;#34;&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;:&lt;/span&gt; &lt;span class=&#34;mi&#34;&gt;1024&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;,&lt;/span&gt; &lt;span class=&#34;c1&#34;&gt;// Fixed parameter
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &lt;span class=&#34;nt&#34;&gt;&amp;#34;chunk_length&amp;#34;&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;:&lt;/span&gt; &lt;span class=&#34;mi&#34;&gt;100&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;,&lt;/span&gt; &lt;span class=&#34;c1&#34;&gt;// Fixed parameter
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &lt;span class=&#34;nt&#34;&gt;&amp;#34;repetition_penalty&amp;#34;&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;:&lt;/span&gt; &lt;span class=&#34;mf&#34;&gt;1.2&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;,&lt;/span&gt; &lt;span class=&#34;c1&#34;&gt;// Fixed parameter
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &lt;span class=&#34;nt&#34;&gt;&amp;#34;temperature&amp;#34;&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;:&lt;/span&gt; &lt;span class=&#34;mf&#34;&gt;0.7&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;,&lt;/span&gt; &lt;span class=&#34;c1&#34;&gt;// Fixed parameter
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &lt;span class=&#34;nt&#34;&gt;&amp;#34;need_asr&amp;#34;&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;:&lt;/span&gt; &lt;span class=&#34;kc&#34;&gt;false&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;,&lt;/span&gt; &lt;span class=&#34;c1&#34;&gt;// Fixed parameter
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &lt;span class=&#34;nt&#34;&gt;&amp;#34;streaming&amp;#34;&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;:&lt;/span&gt; &lt;span class=&#34;kc&#34;&gt;false&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;,&lt;/span&gt; &lt;span class=&#34;c1&#34;&gt;// Fixed parameter
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &lt;span class=&#34;nt&#34;&gt;&amp;#34;is_fixed_seed&amp;#34;&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;:&lt;/span&gt; &lt;span class=&#34;mi&#34;&gt;0&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;,&lt;/span&gt; &lt;span class=&#34;c1&#34;&gt;// Fixed parameter
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &lt;span class=&#34;nt&#34;&gt;&amp;#34;is_norm&amp;#34;&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;:&lt;/span&gt; &lt;span class=&#34;mi&#34;&gt;0&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;,&lt;/span&gt; &lt;span class=&#34;c1&#34;&gt;// Fixed parameter
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &lt;span class=&#34;nt&#34;&gt;&amp;#34;reference_audio&amp;#34;&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;:&lt;/span&gt; &lt;span class=&#34;s2&#34;&gt;&amp;#34;{voice.asr_format_audio_url}&amp;#34;&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;,&lt;/span&gt; &lt;span class=&#34;c1&#34;&gt;// Return value from previous &amp;#34;Model Training&amp;#34; step
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &lt;span class=&#34;nt&#34;&gt;&amp;#34;reference_text&amp;#34;&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;:&lt;/span&gt; &lt;span class=&#34;s2&#34;&gt;&amp;#34;{voice.reference_audio_text}&amp;#34;&lt;/span&gt; &lt;span class=&#34;c1&#34;&gt;// Return value from previous &amp;#34;Model Training&amp;#34; step
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;&lt;span class=&#34;p&#34;&gt;}&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;h3 id=&#34;video-synthesis&#34;&gt;Video Synthesis
&lt;/h3&gt;&lt;ul&gt;
&lt;li&gt;
&lt;p&gt;Synthesis interface: &lt;code&gt;http://127.0.0.1:8383/easy/submit&lt;/code&gt;&lt;/p&gt;
&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;2
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;3
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;4
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;5
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;6
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;7
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;8
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;9
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-json&#34; data-lang=&#34;json&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;&lt;span class=&#34;c1&#34;&gt;// Request parameters
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;&lt;span class=&#34;p&#34;&gt;{&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &lt;span class=&#34;nt&#34;&gt;&amp;#34;audio_url&amp;#34;&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;:&lt;/span&gt; &lt;span class=&#34;s2&#34;&gt;&amp;#34;{audioPath}&amp;#34;&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;,&lt;/span&gt; &lt;span class=&#34;c1&#34;&gt;// Audio path
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &lt;span class=&#34;nt&#34;&gt;&amp;#34;video_url&amp;#34;&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;:&lt;/span&gt; &lt;span class=&#34;s2&#34;&gt;&amp;#34;{videoPath}&amp;#34;&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;,&lt;/span&gt; &lt;span class=&#34;c1&#34;&gt;// Video path
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &lt;span class=&#34;nt&#34;&gt;&amp;#34;code&amp;#34;&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;:&lt;/span&gt; &lt;span class=&#34;s2&#34;&gt;&amp;#34;{uuid}&amp;#34;&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;,&lt;/span&gt; &lt;span class=&#34;c1&#34;&gt;// Unique key
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &lt;span class=&#34;nt&#34;&gt;&amp;#34;chaofen&amp;#34;&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;:&lt;/span&gt; &lt;span class=&#34;mi&#34;&gt;0&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;,&lt;/span&gt; &lt;span class=&#34;c1&#34;&gt;// Fixed value
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &lt;span class=&#34;nt&#34;&gt;&amp;#34;watermark_switch&amp;#34;&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;:&lt;/span&gt; &lt;span class=&#34;mi&#34;&gt;0&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;,&lt;/span&gt; &lt;span class=&#34;c1&#34;&gt;// Fixed value
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &lt;span class=&#34;nt&#34;&gt;&amp;#34;pn&amp;#34;&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;:&lt;/span&gt; &lt;span class=&#34;mi&#34;&gt;1&lt;/span&gt; &lt;span class=&#34;c1&#34;&gt;// Fixed value
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;&lt;span class=&#34;p&#34;&gt;}&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;Progress query: &lt;code&gt;http://127.0.0.1:8383/easy/query?code=${taskCode}&lt;/code&gt;&lt;/p&gt;
&lt;blockquote&gt;
&lt;p&gt;GET request, the parameter &lt;code&gt;taskCode&lt;/code&gt; is the &lt;code&gt;code&lt;/code&gt; from the synthesis interface input above&lt;/p&gt;
&lt;/blockquote&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;h2 id=&#34;self-check-steps-before-asking-questions&#34;&gt;Self-Check Steps Before Asking Questions
&lt;/h2&gt;&lt;ol&gt;
&lt;li&gt;
&lt;p&gt;Check if all three services are in Running status&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;Confirm that your machine has an NVIDIA graphics card and drivers are correctly installed.&lt;/p&gt;
&lt;p&gt;All computing power for this project is local. The three services won&amp;rsquo;t start without an NVIDIA graphics card or proper drivers.&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;Ensure both server and client are updated to the latest version. The project is newly open-sourced, the community is very active, and updates are frequent. Your issue might have been resolved in a new version.&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;Server: Go to &lt;code&gt;/deploy&lt;/code&gt; directory and re-execute &lt;code&gt;docker-compose up -d&lt;/code&gt;&lt;/li&gt;
&lt;li&gt;Client: &lt;code&gt;pull&lt;/code&gt; code and re-&lt;code&gt;build&lt;/code&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;https://github.com/GuijiAI/HeyGem.ai/issues&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;GitHub Issues&lt;/a&gt; are continuously updated, issues are being resolved and closed daily. Check frequently, your issue might already be resolved.&lt;/p&gt;
&lt;/li&gt;
&lt;/ol&gt;
&lt;h2 id=&#34;question-template&#34;&gt;Question Template
&lt;/h2&gt;&lt;ol&gt;
&lt;li&gt;
&lt;p&gt;Problem Description&lt;/p&gt;
&lt;p&gt;Describe the reproduction steps in detail, with screenshots if possible.&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;Provide Error Logs&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;
&lt;p&gt;How to get client logs:&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;
&lt;p&gt;Server logs:&lt;/p&gt;
&lt;p&gt;Find the key location, or click on our three Docker services, and &amp;ldquo;Copy&amp;rdquo; as shown below.&lt;/p&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ol&gt;
&lt;h2 id=&#34;contact-us&#34;&gt;Contact Us
&lt;/h2&gt;&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-gdscript3&#34; data-lang=&#34;gdscript3&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &lt;span class=&#34;n&#34;&gt;James&lt;/span&gt;&lt;span class=&#34;err&#34;&gt;@&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;toolwiz&lt;/span&gt;&lt;span class=&#34;o&#34;&gt;.&lt;/span&gt;&lt;span class=&#34;n&#34;&gt;com&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;h2 id=&#34;license&#34;&gt;License
&lt;/h2&gt;&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;./LICENSE&#34; &gt;LICENSE&lt;/a&gt;&lt;/p&gt;
&lt;h2 id=&#34;acknowledgments&#34;&gt;Acknowledgments
&lt;/h2&gt;&lt;ul&gt;
&lt;li&gt;ASR based on &lt;a class=&#34;link&#34; href=&#34;https://github.com/modelscope/FunASR&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;fun-asr&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;TTS based on &lt;a class=&#34;link&#34; href=&#34;https://github.com/fishaudio/fish-speech&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;fish-speech-ziming&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;h2 id=&#34;star-history&#34;&gt;Star History
&lt;/h2&gt;&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;https://www.star-history.com/#GuijiAI/HeyGem.ai&amp;amp;Date&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;img src=&#34;https://api.star-history.com/svg?repos=GuijiAI/HeyGem.ai&amp;amp;type=Date&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;Star History Chart&#34;
	
	
&gt;&lt;/a&gt;&lt;/p&gt;
</description>
        </item>
        
    </channel>
</rss>
