<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
    <channel>
        <title>GUI Automation on Producthunt daily</title>
        <link>https://producthunt.programnotes.cn/en/tags/gui-automation/</link>
        <description>Recent content in GUI Automation on Producthunt daily</description>
        <generator>Hugo -- gohugo.io</generator>
        <language>en</language>
        <lastBuildDate>Sun, 10 May 2026 17:00:12 +0800</lastBuildDate><atom:link href="https://producthunt.programnotes.cn/en/tags/gui-automation/index.xml" rel="self" type="application/rss+xml" /><item>
        <title>UI-TARS-desktop</title>
        <link>https://producthunt.programnotes.cn/en/p/ui-tars-desktop/</link>
        <pubDate>Sun, 10 May 2026 17:00:12 +0800</pubDate>
        
        <guid>https://producthunt.programnotes.cn/en/p/ui-tars-desktop/</guid>
        <description>&lt;img src="https://images.unsplash.com/photo-1690922515498-820b007028bb?ixid=M3w0NjAwMjJ8MHwxfHJhbmRvbXx8fHx8fHx8fDE3Nzg0MDM1OTN8&amp;ixlib=rb-4.1.0" alt="Featured image of post UI-TARS-desktop" /&gt;&lt;h1 id=&#34;bytedanceui-tars-desktop&#34;&gt;&lt;a class=&#34;link&#34; href=&#34;https://github.com/bytedance/UI-TARS-desktop&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;bytedance/UI-TARS-desktop&lt;/a&gt;
&lt;/h1&gt;&lt;picture&gt;
  &lt;img alt=&#34;Agent TARS Banner&#34; src=&#34;./images/tars.png&#34;&gt;
&lt;/picture&gt;
&lt;br/&gt;
&lt;h2 id=&#34;introduction&#34;&gt;Introduction
&lt;/h2&gt;&lt;p&gt;English | &lt;a class=&#34;link&#34; href=&#34;./README.zh-CN.md&#34; &gt;简体中文&lt;/a&gt;&lt;/p&gt;
&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;https://trendshift.io/repositories/13584&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;img src=&#34;https://trendshift.io/api/badge/repositories/13584&#34;
	
	
	
	loading=&#34;lazy&#34;
	
	
&gt;&lt;/a&gt;&lt;/p&gt;
&lt;p&gt;&lt;b&gt;TARS&lt;sup&gt;*&lt;/sup&gt;&lt;/b&gt; is a Multimodal AI Agent stack, currently shipping two projects: &lt;a class=&#34;link&#34; href=&#34;#agent-tars&#34; &gt;Agent TARS&lt;/a&gt; and &lt;a class=&#34;link&#34; href=&#34;#ui-tars-desktop&#34; &gt;UI-TARS-desktop&lt;/a&gt;:&lt;/p&gt;
&lt;table&gt;
  &lt;thead&gt;
    &lt;tr&gt;
      &lt;th width=&#34;50%&#34; align=&#34;center&#34;&gt;&lt;a href=&#34;#agent-tars&#34;&gt;Agent TARS&lt;/a&gt;&lt;/th&gt;
      &lt;th width=&#34;50%&#34; align=&#34;center&#34;&gt;&lt;a href=&#34;#ui-tars-desktop&#34;&gt;UI-TARS-desktop&lt;/a&gt;&lt;/th&gt;
    &lt;/tr&gt;
  &lt;/thead&gt;
  &lt;tbody&gt;
    &lt;tr&gt;
      &lt;td align=&#34;center&#34;&gt;
        &lt;video src=&#34;https://github.com/user-attachments/assets/c9489936-afdc-4d12-adda-d4b90d2a869d&#34; width=&#34;50%&#34;&gt;&lt;/video&gt;
      &lt;/td&gt;
      &lt;td align=&#34;center&#34;&gt;
        &lt;video src=&#34;https://github.com/user-attachments/assets/e0914ce9-ad33-494b-bdec-0c25c1b01a27&#34; width=&#34;50%&#34;&gt;&lt;/video&gt;
      &lt;/td&gt;
    &lt;/tr&gt;
    &lt;tr&gt;
      &lt;td align=&#34;left&#34;&gt;
        &lt;b&gt;Agent TARS&lt;/b&gt; is a general multimodal AI Agent stack, it brings the power of GUI Agent and Vision into your terminal, computer, browser and product.
        &lt;br&gt;
        &lt;br&gt;
        It primarily ships with a &lt;a href=&#34;https://agent-tars.com/guide/basic/cli.html&#34; target=&#34;_blank&#34;&gt;CLI&lt;/a&gt; and &lt;a href=&#34;https://agent-tars.com/guide/basic/web-ui.html&#34; target=&#34;_blank&#34;&gt;Web UI&lt;/a&gt; for usage.
        It aims to provide a workflow that is closer to human-like task completion through cutting-edge multimodal LLMs and seamless integration with various real-world &lt;a href=&#34;https://agent-tars.com/guide/basic/mcp.html&#34; target=&#34;_blank&#34;&gt;MCP&lt;/a&gt; tools.
      &lt;/td&gt;
      &lt;td align=&#34;left&#34;&gt;
        &lt;b&gt;UI-TARS Desktop&lt;/b&gt; is a desktop application that provides a native GUI Agent based on the &lt;a href=&#34;https://github.com/bytedance/UI-TARS&#34; target=&#34;_blank&#34;&gt;UI-TARS&lt;/a&gt; model.
        &lt;br&gt;
        &lt;br&gt;
        It primarily ships a
        &lt;a href=&#34;https://github.com/bytedance/UI-TARS-desktop/blob/main/docs/quick-start.md#get-model-and-run-local-operator&#34; target=&#34;_blank&#34;&gt;local&lt;/a&gt; and 
        &lt;a href=&#34;https://github.com/bytedance/UI-TARS-desktop/blob/main/docs/quick-start.md#run-remote-operator&#34; target=&#34;_blank&#34;&gt;remote&lt;/a&gt; computer as well as browser operators.
      &lt;/td&gt;
    &lt;/tr&gt;
  &lt;/tbody&gt;
&lt;/table&gt;
&lt;h2 id=&#34;table-of-contents&#34;&gt;Table of Contents
&lt;/h2&gt;&lt;!-- START doctoc generated TOC please keep comment here to allow auto update --&gt;
&lt;!-- DON&#39;T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE --&gt;
&lt;ul&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#news&#34; &gt;News&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#agent-tars&#34; &gt;Agent TARS&lt;/a&gt;
&lt;ul&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#showcase&#34; &gt;Showcase&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#core-features&#34; &gt;Core Features&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#quick-start&#34; &gt;Quick Start&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#documentation&#34; &gt;Documentation&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#ui-tars-desktop&#34; &gt;UI-TARS Desktop&lt;/a&gt;
&lt;ul&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#showcase-1&#34; &gt;Showcase&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#features&#34; &gt;Features&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#quick-start-1&#34; &gt;Quick Start&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#contributing&#34; &gt;Contributing&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#license&#34; &gt;License&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;#citation&#34; &gt;Citation&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;!-- END doctoc generated TOC please keep comment here to allow auto update --&gt;
&lt;h2 id=&#34;news&#34;&gt;News
&lt;/h2&gt;&lt;ul&gt;
&lt;li&gt;&lt;strong&gt;\[2025-11-05\]&lt;/strong&gt; 🎉 We&amp;rsquo;re excited to announce the release of &lt;a class=&#34;link&#34; href=&#34;https://github.com/bytedance/UI-TARS-desktop/releases/tag/v0.3.0&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;Agent TARS CLI v0.3.0&lt;/a&gt;! This version brings streaming support for multiple tools (shell commands, multi-file structured display), runtime settings with timing statistics for tool calls and deep thinking, Event Stream Viewer for data flow tracking and debugging. Additionally, it features exclusive support for &lt;a class=&#34;link&#34; href=&#34;https://github.com/agent-infra/sandbox&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;AIO agent Sandbox&lt;/a&gt; as isolated all-in-one tools execution environment.&lt;/li&gt;
&lt;li&gt;&lt;strong&gt;\[2025-06-25\]&lt;/strong&gt; We released an Agent TARS Beta and Agent TARS CLI - &lt;a class=&#34;link&#34; href=&#34;https://agent-tars.com/blog/2025-06-25-introducing-agent-tars-beta.html&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;Introducing Agent TARS Beta&lt;/a&gt;, a multimodal AI agent that aims to explore a work form that is closer to human-like task completion through rich multimodal capabilities (such as GUI Agent, Vision) and seamless integration with various real-world tools.&lt;/li&gt;
&lt;li&gt;&lt;strong&gt;\[2025-06-12\]&lt;/strong&gt; - 🎁 We are thrilled to announce the release of UI-TARS Desktop v0.2.0! This update introduces two powerful new features: &lt;strong&gt;Remote Computer Operator&lt;/strong&gt; and &lt;strong&gt;Remote Browser Operator&lt;/strong&gt;—both completely free. No configuration required: simply click to remotely control any computer or browser, and experience a new level of convenience and intelligence.&lt;/li&gt;
&lt;li&gt;&lt;strong&gt;\[2025-04-17\]&lt;/strong&gt; - 🎉 We&amp;rsquo;re thrilled to announce the release of new UI-TARS Desktop application v0.1.0, featuring a redesigned Agent UI. The application enhances the computer using experience, introduces new browser operation features, and supports &lt;a class=&#34;link&#34; href=&#34;https://seed-tars.com/1.5&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;the advanced UI-TARS-1.5 model&lt;/a&gt; for improved performance and precise control.&lt;/li&gt;
&lt;li&gt;&lt;strong&gt;\[2025-02-20\]&lt;/strong&gt; - 📦 Introduced &lt;a class=&#34;link&#34; href=&#34;./docs/sdk.md&#34; &gt;UI TARS SDK&lt;/a&gt;, is a powerful cross-platform toolkit for building GUI automation agents.&lt;/li&gt;
&lt;li&gt;&lt;strong&gt;\[2025-01-23\]&lt;/strong&gt; - 🚀 We updated the &lt;strong&gt;&lt;a class=&#34;link&#34; href=&#34;./docs/deployment.md#cloud-deployment&#34; &gt;Cloud Deployment&lt;/a&gt;&lt;/strong&gt; section in the 中文版: &lt;a class=&#34;link&#34; href=&#34;https://bytedance.sg.larkoffice.com/docx/TCcudYwyIox5vyxiSDLlgIsTgWf#U94rdCxzBoJMLex38NPlHL21gNb&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;GUI模型部署教程&lt;/a&gt; with new information related to the ModelScope platform. You can now use the ModelScope platform for deployment.&lt;/li&gt;
&lt;/ul&gt;
&lt;br&gt;
&lt;h2 id=&#34;agent-tars&#34;&gt;Agent TARS
&lt;/h2&gt;&lt;p&gt;
    &lt;a href=&#34;https://npmjs.com/package/@agent-tars/cli?activeTab=readme&#34;&gt;&lt;img src=&#34;https://img.shields.io/npm/v/@agent-tars/cli?style=for-the-badge&amp;colorA=1a1a2e&amp;colorB=3B82F6&amp;logo=npm&amp;logoColor=white&#34; alt=&#34;npm version&#34; /&gt;&lt;/a&gt;
    &lt;a href=&#34;https://npmcharts.com/compare/@agent-tars/cli?minimal=true&#34;&gt;&lt;img src=&#34;https://img.shields.io/npm/dm/@agent-tars/cli.svg?style=for-the-badge&amp;colorA=1a1a2e&amp;colorB=0EA5E9&amp;logo=npm&amp;logoColor=white&#34; alt=&#34;downloads&#34; /&gt;&lt;/a&gt;
    &lt;a href=&#34;https://nodejs.org/en/about/previous-releases&#34;&gt;&lt;img src=&#34;https://img.shields.io/node/v/@agent-tars/cli.svg?style=for-the-badge&amp;colorA=1a1a2e&amp;colorB=06B6D4&amp;logo=node.js&amp;logoColor=white&#34; alt=&#34;node version&#34;&gt;&lt;/a&gt;
    &lt;a href=&#34;https://discord.gg/HnKcSBgTVx&#34;&gt;&lt;img src=&#34;https://img.shields.io/badge/Discord-Join%20Community-5865F2?style=for-the-badge&amp;logo=discord&amp;logoColor=white&#34; alt=&#34;Discord Community&#34; /&gt;&lt;/a&gt;
    &lt;a href=&#34;https://twitter.com/agent_tars&#34;&gt;&lt;img src=&#34;https://img.shields.io/badge/Twitter-Follow%20%40agent__tars-1DA1F2?style=for-the-badge&amp;logo=twitter&amp;logoColor=white&#34; alt=&#34;Official Twitter&#34; /&gt;&lt;/a&gt;
    &lt;a href=&#34;https://applink.larkoffice.com/client/chat/chatter/add_by_link?link_token=deen76f4-ea3c-4964-93a3-78f126f39651&#34;&gt;&lt;img src=&#34;https://img.shields.io/badge/飞书群-加入交流群-00D4AA?style=for-the-badge&amp;logo=lark&amp;logoColor=white&#34; alt=&#34;飞书交流群&#34; /&gt;&lt;/a&gt;
    &lt;a href=&#34;https://deepwiki.com/bytedance/UI-TARS-desktop&#34;&gt;&lt;img src=&#34;https://img.shields.io/badge/DeepWiki-Ask%20AI-8B5CF6?style=for-the-badge&amp;logo=gitbook&amp;logoColor=white&#34; alt=&#34;Ask DeepWiki&#34; /&gt;&lt;/a&gt;
&lt;/p&gt;
&lt;p&gt;&lt;b&gt;Agent TARS&lt;/b&gt; is a general multimodal AI Agent stack, it brings the power of GUI Agent and Vision into your terminal, computer, browser and product. &lt;br&gt; &lt;br&gt;
It primarily ships with a &lt;a href=&#34;https://agent-tars.com/guide/basic/cli.html&#34; target=&#34;_blank&#34;&gt;CLI&lt;/a&gt; and &lt;a href=&#34;https://agent-tars.com/guide/basic/web-ui.html&#34; target=&#34;_blank&#34;&gt;Web UI&lt;/a&gt; for usage.
It aims to provide a workflow that is closer to human-like task completion through cutting-edge multimodal LLMs and seamless integration with various real-world &lt;a href=&#34;https://agent-tars.com/guide/basic/mcp.html&#34; target=&#34;_blank&#34;&gt;MCP&lt;/a&gt; tools.&lt;/p&gt;
&lt;h3 id=&#34;showcase&#34;&gt;Showcase
&lt;/h3&gt;&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-fallback&#34; data-lang=&#34;fallback&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;Please help me book the earliest flight from San Jose to New York on September 1st and the last return flight on September 6th on Priceline
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;https://github.com/user-attachments/assets/772b0eef-aef7-4ab9-8cb0-9611820539d8&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;https://github.com/user-attachments/assets/772b0eef-aef7-4ab9-8cb0-9611820539d8&lt;/a&gt;&lt;/p&gt;
&lt;br&gt;
&lt;table&gt;
  &lt;thead&gt;
    &lt;tr&gt;
      &lt;th width=&#34;50%&#34; align=&#34;center&#34;&gt;Booking Hotel&lt;/th&gt;
      &lt;th width=&#34;50%&#34; align=&#34;center&#34;&gt;Generate Chart with extra MCP Servers&lt;/th&gt;
    &lt;/tr&gt;
  &lt;/thead&gt;
  &lt;tbody&gt;
    &lt;tr&gt;
      &lt;td align=&#34;center&#34;&gt;
        &lt;video src=&#34;https://github.com/user-attachments/assets/c9489936-afdc-4d12-adda-d4b90d2a869d&#34; width=&#34;50%&#34;&gt;&lt;/video&gt;
      &lt;/td&gt;
      &lt;td align=&#34;center&#34;&gt;
        &lt;video src=&#34;https://github.com/user-attachments/assets/a9fd72d0-01bb-4233-aa27-ca95194bbce9&#34; width=&#34;50%&#34;&gt;&lt;/video&gt;
      &lt;/td&gt;
    &lt;/tr&gt;
    &lt;tr&gt;
      &lt;td align=&#34;left&#34;&gt;
        &lt;b&gt;Instruction:&lt;/b&gt; &lt;i&gt;I am in Los Angeles from September 1st to September 6th, with a budget of $5,000. Please help me book a Ritz-Carlton hotel closest to the airport on booking.com and compile a transportation guide for me&lt;/i&gt;
      &lt;/td&gt;
      &lt;td align=&#34;left&#34;&gt;
        &lt;b&gt;Instruction:&lt;/b&gt; &lt;i&gt;Draw me a chart of Hangzhou&#39;s weather for one month&lt;/i&gt;
      &lt;/td&gt;
    &lt;/tr&gt;
  &lt;/tbody&gt;
&lt;/table&gt;
&lt;p&gt;For more use cases, please check out &lt;a class=&#34;link&#34; href=&#34;https://github.com/bytedance/UI-TARS-desktop/issues/842&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;#842&lt;/a&gt;.&lt;/p&gt;
&lt;h3 id=&#34;core-features&#34;&gt;Core Features
&lt;/h3&gt;&lt;ul&gt;
&lt;li&gt;🖱️ &lt;strong&gt;One-Click Out-of-the-box CLI&lt;/strong&gt; - Supports both &lt;strong&gt;headful&lt;/strong&gt; &lt;a class=&#34;link&#34; href=&#34;https://agent-tars.com/guide/basic/web-ui.html&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;Web UI&lt;/a&gt; and &lt;strong&gt;headless&lt;/strong&gt; &lt;a class=&#34;link&#34; href=&#34;https://agent-tars.com/guide/advanced/server.html&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;server&lt;/a&gt; &lt;a class=&#34;link&#34; href=&#34;https://agent-tars.com/guide/basic/cli.html&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;execution&lt;/a&gt;.&lt;/li&gt;
&lt;li&gt;🌐 &lt;strong&gt;Hybrid Browser Agent&lt;/strong&gt; - Control browsers using &lt;a class=&#34;link&#34; href=&#34;https://agent-tars.com/guide/basic/browser.html#visual-grounding&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;GUI Agent&lt;/a&gt;, &lt;a class=&#34;link&#34; href=&#34;https://agent-tars.com/guide/basic/browser.html#dom&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;DOM&lt;/a&gt;, or a hybrid strategy.&lt;/li&gt;
&lt;li&gt;🔄 &lt;strong&gt;Event Stream&lt;/strong&gt; - Protocol-driven Event Stream drives &lt;a class=&#34;link&#34; href=&#34;https://agent-tars.com/beta#context-engineering&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;Context Engineering&lt;/a&gt; and &lt;a class=&#34;link&#34; href=&#34;https://agent-tars.com/blog/2025-06-25-introducing-agent-tars-beta.html#easy-to-build-applications&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;Agent UI&lt;/a&gt;.&lt;/li&gt;
&lt;li&gt;🧰 &lt;strong&gt;MCP Integration&lt;/strong&gt; - The kernel is built on MCP and also supports mounting &lt;a class=&#34;link&#34; href=&#34;https://agent-tars.com/guide/basic/mcp.html&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;MCP Servers&lt;/a&gt; to connect to real-world tools.&lt;/li&gt;
&lt;/ul&gt;
&lt;h3 id=&#34;quick-start&#34;&gt;Quick Start
&lt;/h3&gt;&lt;img alt=&#34;Agent TARS CLI&#34; src=&#34;https://agent-tars.com/agent-tars-cli.png&#34;&gt;
&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;2
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;3
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;4
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;5
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;6
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;7
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;8
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;9
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-bash&#34; data-lang=&#34;bash&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;&lt;span class=&#34;c1&#34;&gt;# Launch with `npx`.&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;npx @agent-tars/cli@latest
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;&lt;span class=&#34;c1&#34;&gt;# Install globally, required Node.js &amp;gt;= 22&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;npm install @agent-tars/cli@latest -g
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;&lt;span class=&#34;c1&#34;&gt;# Run with your preferred model provider&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;agent-tars --provider volcengine --model doubao-1-5-thinking-vision-pro-250428 --apiKey your-api-key
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;agent-tars --provider anthropic --model claude-3-7-sonnet-latest --apiKey your-api-key
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;p&gt;Visit the comprehensive &lt;a class=&#34;link&#34; href=&#34;https://agent-tars.com/guide/get-started/quick-start.html&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;Quick Start&lt;/a&gt; guide for detailed setup instructions.&lt;/p&gt;
&lt;h3 id=&#34;documentation&#34;&gt;Documentation
&lt;/h3&gt;&lt;blockquote&gt;
&lt;p&gt;🌟 &lt;strong&gt;Explore Agent TARS Universe&lt;/strong&gt; 🌟&lt;/p&gt;
&lt;/blockquote&gt;
&lt;table&gt;
  &lt;thead&gt;
    &lt;tr&gt;
      &lt;th width=&#34;20%&#34; align=&#34;center&#34;&gt;Category&lt;/th&gt;
      &lt;th width=&#34;30%&#34; align=&#34;center&#34;&gt;Resource Link&lt;/th&gt;
      &lt;th width=&#34;50%&#34; align=&#34;left&#34;&gt;Description&lt;/th&gt;
    &lt;/tr&gt;
  &lt;/thead&gt;
  &lt;tbody&gt;
    &lt;tr&gt;
      &lt;td align=&#34;center&#34;&gt;🏠 &lt;strong&gt;Central Hub&lt;/strong&gt;&lt;/td&gt;
      &lt;td align=&#34;center&#34;&gt;
        &lt;a href=&#34;https://agent-tars.com&#34;&gt;
          &lt;img src=&#34;https://img.shields.io/badge/Visit-Website-4F46E5?style=for-the-badge&amp;logo=globe&amp;logoColor=white&#34; alt=&#34;Website&#34; /&gt;
        &lt;/a&gt;
      &lt;/td&gt;
      &lt;td align=&#34;left&#34;&gt;Your gateway to Agent TARS ecosystem&lt;/td&gt;
    &lt;/tr&gt;
      &lt;tr&gt;
      &lt;td align=&#34;center&#34;&gt;📚 &lt;strong&gt;Quick Start&lt;/strong&gt;&lt;/td&gt;
      &lt;td align=&#34;center&#34;&gt;
        &lt;a href=&#34;https://agent-tars.com/guide/get-started/quick-start.html&#34;&gt;
          &lt;img src=&#34;https://img.shields.io/badge/Get-Started-06B6D4?style=for-the-badge&amp;logo=rocket&amp;logoColor=white&#34; alt=&#34;Quick Start&#34; /&gt;
        &lt;/a&gt;
      &lt;/td&gt;
      &lt;td align=&#34;left&#34;&gt;Zero to hero in 5 minutes&lt;/td&gt;
    &lt;/tr&gt;
    &lt;tr&gt;
      &lt;td align=&#34;center&#34;&gt;🚀 &lt;strong&gt;What&#39;s New&lt;/strong&gt;&lt;/td&gt;
      &lt;td align=&#34;center&#34;&gt;
        &lt;a href=&#34;https://agent-tars.com/beta&#34;&gt;
          &lt;img src=&#34;https://img.shields.io/badge/Read-Blog-F59E0B?style=for-the-badge&amp;logo=rss&amp;logoColor=white&#34; alt=&#34;Blog&#34; /&gt;
        &lt;/a&gt;
      &lt;/td&gt;
      &lt;td align=&#34;left&#34;&gt;Discover cutting-edge features &amp; vision&lt;/td&gt;
    &lt;/tr&gt;
    &lt;tr&gt;
      &lt;td align=&#34;center&#34;&gt;🛠️ &lt;strong&gt;Developer Zone&lt;/strong&gt;&lt;/td&gt;
      &lt;td align=&#34;center&#34;&gt;
        &lt;a href=&#34;https://agent-tars.com/guide/get-started/introduction.html&#34;&gt;
          &lt;img src=&#34;https://img.shields.io/badge/View-Docs-10B981?style=for-the-badge&amp;logo=gitbook&amp;logoColor=white&#34; alt=&#34;Docs&#34; /&gt;
        &lt;/a&gt;
      &lt;/td&gt;
      &lt;td align=&#34;left&#34;&gt;Master every command &amp; features&lt;/td&gt;
    &lt;/tr&gt;
    &lt;tr&gt;
      &lt;td align=&#34;center&#34;&gt;🎯 &lt;strong&gt;Showcase&lt;/strong&gt;&lt;/td&gt;
      &lt;td align=&#34;center&#34;&gt;
        &lt;a href=&#34;https://github.com/bytedance/UI-TARS-desktop/issues/842&#34;&gt;
          &lt;img src=&#34;https://img.shields.io/badge/View-Examples-8B5CF6?style=for-the-badge&amp;logo=github&amp;logoColor=white&#34; alt=&#34;Examples&#34; /&gt;
        &lt;/a&gt;
      &lt;/td&gt;
      &lt;td align=&#34;left&#34;&gt;View use cases built by the official and community&lt;/td&gt;
    &lt;/tr&gt;
    &lt;tr&gt;
      &lt;td align=&#34;center&#34;&gt;🔧 &lt;strong&gt;Reference&lt;/strong&gt;&lt;/td&gt;
      &lt;td align=&#34;center&#34;&gt;
        &lt;a href=&#34;https://agent-tars.com/api/&#34;&gt;
          &lt;img src=&#34;https://img.shields.io/badge/API-Reference-EF4444?style=for-the-badge&amp;logo=book&amp;logoColor=white&#34; alt=&#34;API&#34; /&gt;
        &lt;/a&gt;
      &lt;/td&gt;
      &lt;td align=&#34;left&#34;&gt;Complete technical reference&lt;/td&gt;
    &lt;/tr&gt;
  &lt;/tbody&gt;
&lt;/table&gt;
&lt;br/&gt;
&lt;br/&gt;
&lt;br/&gt;
&lt;h2 id=&#34;ui-tars-desktop&#34;&gt;UI-TARS Desktop
&lt;/h2&gt;&lt;p align=&#34;center&#34;&gt;
  &lt;img alt=&#34;UI-TARS&#34; width=&#34;260&#34; src=&#34;./apps/ui-tars/resources/icon.png&#34;&gt;
&lt;/p&gt;
&lt;p&gt;UI-TARS Desktop is a native GUI agent for your local computer, driven by &lt;a class=&#34;link&#34; href=&#34;https://github.com/bytedance/UI-TARS&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;UI-TARS&lt;/a&gt; and Seed-1.5-VL/1.6 series models.&lt;/p&gt;
&lt;div align=&#34;center&#34;&gt;
&lt;p&gt;
        &amp;nbsp&amp;nbsp 📑 &lt;a href=&#34;https://arxiv.org/abs/2501.12326&#34;&gt;Paper&lt;/a&gt; &amp;nbsp&amp;nbsp
        | 🤗 &lt;a href=&#34;https://huggingface.co/ByteDance-Seed/UI-TARS-1.5-7B&#34;&gt;Hugging Face Models&lt;/a&gt;&amp;nbsp&amp;nbsp
        | &amp;nbsp&amp;nbsp🫨 &lt;a href=&#34;https://discord.gg/pTXwYVjfcs&#34;&gt;Discord&lt;/a&gt;&amp;nbsp&amp;nbsp
        | &amp;nbsp&amp;nbsp🤖 &lt;a href=&#34;https://www.modelscope.cn/collections/UI-TARS-bccb56fa1ef640&#34;&gt;ModelScope&lt;/a&gt;&amp;nbsp&amp;nbsp
&lt;br&gt;
🖥️ Desktop Application &amp;nbsp&amp;nbsp
| &amp;nbsp&amp;nbsp 👓 &lt;a href=&#34;https://github.com/web-infra-dev/midscene&#34;&gt;Midscene (use in browser)&lt;/a&gt; &amp;nbsp&amp;nbsp
&lt;/p&gt;
&lt;/div&gt;
&lt;h3 id=&#34;showcase-1&#34;&gt;Showcase
&lt;/h3&gt;&lt;!-- // FIXME: Choose only two demo, one local computer and one remote computer showcase. --&gt;
&lt;table&gt;
  &lt;thead&gt;
      &lt;tr&gt;
          &lt;th style=&#34;text-align: center&#34;&gt;Instruction&lt;/th&gt;
          &lt;th style=&#34;text-align: center&#34;&gt;Local Operator&lt;/th&gt;
          &lt;th style=&#34;text-align: center&#34;&gt;Remote Operator&lt;/th&gt;
      &lt;/tr&gt;
  &lt;/thead&gt;
  &lt;tbody&gt;
      &lt;tr&gt;
          &lt;td style=&#34;text-align: center&#34;&gt;Please help me open the autosave feature of VS Code and delay AutoSave operations for 500 milliseconds in the VS Code setting.&lt;/td&gt;
          &lt;td style=&#34;text-align: center&#34;&gt;&lt;video src=&#34;https://github.com/user-attachments/assets/e0914ce9-ad33-494b-bdec-0c25c1b01a27&#34; height=&#34;300&#34; /&gt;&lt;/td&gt;
          &lt;td style=&#34;text-align: center&#34;&gt;&lt;video src=&#34;https://github.com/user-attachments/assets/01e49b69-7070-46c8-b3e3-2aaaaec71800&#34; height=&#34;300&#34; /&gt;&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td style=&#34;text-align: center&#34;&gt;Could you help me check the latest open issue of the UI-TARS-Desktop project on GitHub?&lt;/td&gt;
          &lt;td style=&#34;text-align: center&#34;&gt;&lt;video src=&#34;https://github.com/user-attachments/assets/3d159f54-d24a-4268-96c0-e149607e9199&#34; height=&#34;300&#34; /&gt;&lt;/td&gt;
          &lt;td style=&#34;text-align: center&#34;&gt;&lt;video src=&#34;https://github.com/user-attachments/assets/072fb72d-7394-4bfa-95f5-4736e29f7e58&#34; height=&#34;300&#34; /&gt;&lt;/td&gt;
      &lt;/tr&gt;
  &lt;/tbody&gt;
&lt;/table&gt;
&lt;h3 id=&#34;features&#34;&gt;Features
&lt;/h3&gt;&lt;ul&gt;
&lt;li&gt;🤖 Natural language control powered by Vision-Language Model&lt;/li&gt;
&lt;li&gt;🖥️ Screenshot and visual recognition support&lt;/li&gt;
&lt;li&gt;🎯 Precise mouse and keyboard control&lt;/li&gt;
&lt;li&gt;💻 Cross-platform support (Windows/MacOS/Browser)&lt;/li&gt;
&lt;li&gt;🔄 Real-time feedback and status display&lt;/li&gt;
&lt;li&gt;🔐 Private and secure - fully local processing&lt;/li&gt;
&lt;/ul&gt;
&lt;h3 id=&#34;quick-start-1&#34;&gt;Quick Start
&lt;/h3&gt;&lt;p&gt;See &lt;a class=&#34;link&#34; href=&#34;./docs/quick-start.md&#34; &gt;Quick Start&lt;/a&gt;&lt;/p&gt;
&lt;h2 id=&#34;contributing&#34;&gt;Contributing
&lt;/h2&gt;&lt;p&gt;See &lt;a class=&#34;link&#34; href=&#34;./CONTRIBUTING.md&#34; &gt;CONTRIBUTING.md&lt;/a&gt;.&lt;/p&gt;
&lt;h2 id=&#34;license&#34;&gt;License
&lt;/h2&gt;&lt;p&gt;This project is licensed under the Apache License 2.0.&lt;/p&gt;
&lt;h2 id=&#34;citation&#34;&gt;Citation
&lt;/h2&gt;&lt;p&gt;If you find our paper and code useful in your research, please consider giving a star :star: and citation :pencil:&lt;/p&gt;
&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;2
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;3
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;4
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;5
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;6
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-BibTeX&#34; data-lang=&#34;BibTeX&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;&lt;span class=&#34;nc&#34;&gt;@article&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;{&lt;/span&gt;&lt;span class=&#34;nl&#34;&gt;qin2025ui&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;,&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &lt;span class=&#34;na&#34;&gt;title&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;=&lt;/span&gt;&lt;span class=&#34;s&#34;&gt;{UI-TARS: Pioneering Automated GUI Interaction with Native Agents}&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;,&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &lt;span class=&#34;na&#34;&gt;author&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;=&lt;/span&gt;&lt;span class=&#34;s&#34;&gt;{Qin, Yujia and Ye, Yining and Fang, Junjie and Wang, Haoming and Liang, Shihao and Tian, Shizuo and Zhang, Junda and Li, Jiahao and Li, Yunxin and Huang, Shijue and others}&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;,&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &lt;span class=&#34;na&#34;&gt;journal&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;=&lt;/span&gt;&lt;span class=&#34;s&#34;&gt;{arXiv preprint arXiv:2501.12326}&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;,&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  &lt;span class=&#34;na&#34;&gt;year&lt;/span&gt;&lt;span class=&#34;p&#34;&gt;=&lt;/span&gt;&lt;span class=&#34;s&#34;&gt;{2025}&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;&lt;span class=&#34;p&#34;&gt;}&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;</description>
        </item>
        
    </channel>
</rss>
