<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
    <channel>
        <title>Serving Engine on Producthunt daily</title>
        <link>https://producthunt.programnotes.cn/en/tags/serving-engine/</link>
        <description>Recent content in Serving Engine on Producthunt daily</description>
        <generator>Hugo -- gohugo.io</generator>
        <language>en</language>
        <lastBuildDate>Wed, 20 Aug 2025 15:28:48 +0800</lastBuildDate><atom:link href="https://producthunt.programnotes.cn/en/tags/serving-engine/index.xml" rel="self" type="application/rss+xml" /><item>
        <title>LMCache</title>
        <link>https://producthunt.programnotes.cn/en/p/lmcache/</link>
        <pubDate>Wed, 20 Aug 2025 15:28:48 +0800</pubDate>
        
        <guid>https://producthunt.programnotes.cn/en/p/lmcache/</guid>
        <description>&lt;img src="https://images.unsplash.com/photo-1478034460338-249ef2da6c0f?ixid=M3w0NjAwMjJ8MHwxfHJhbmRvbXx8fHx8fHx8fDE3NTU2NzQ5MDF8&amp;ixlib=rb-4.1.0" alt="Featured image of post LMCache" /&gt;&lt;h1 id=&#34;lmcachelmcache&#34;&gt;&lt;a class=&#34;link&#34; href=&#34;https://github.com/LMCache/LMCache&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;LMCache/LMCache&lt;/a&gt;
&lt;/h1&gt;&lt;div align=&#34;center&#34;&gt;
  &lt;p align=&#34;center&#34;&gt;
    &lt;img src=&#34;https://raw.githubusercontent.com/LMCache/LMCache/dev/asset/logo.png&#34; width=&#34;720&#34; alt=&#34;lmcache logo&#34;&gt;
  &lt;/p&gt;
&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;https://docs.lmcache.ai/&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;img src=&#34;https://img.shields.io/badge/docs-live-brightgreen&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;Docs&#34;
	
	
&gt;&lt;/a&gt;
&lt;a class=&#34;link&#34; href=&#34;https://pypi.org/project/lmcache/&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;img src=&#34;https://img.shields.io/pypi/v/lmcache&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;PyPI&#34;
	
	
&gt;&lt;/a&gt;
&lt;a class=&#34;link&#34; href=&#34;https://pypi.org/project/lmcache/&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;img src=&#34;https://img.shields.io/pypi/pyversions/lmcache&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;PyPI - Python Version&#34;
	
	
&gt;&lt;/a&gt;
&lt;a class=&#34;link&#34; href=&#34;https://buildkite.com/lmcache/lmcache-unittests&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;img src=&#34;https://badge.buildkite.com/ce25f1819a274b7966273bfa54f0e02f092c3de0d7563c5c9d.svg&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;Unit Tests&#34;
	
	
&gt;&lt;/a&gt;
&lt;a class=&#34;link&#34; href=&#34;https://github.com/LMCache/LMCache/actions/workflows/code_quality_checks.yml&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;img src=&#34;https://github.com/lmcache/lmcache/actions/workflows/code_quality_checks.yml/badge.svg?branch=dev&amp;amp;label=tests&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;Code Quality&#34;
	
	
&gt;&lt;/a&gt;
&lt;a class=&#34;link&#34; href=&#34;https://buildkite.com/lmcache/lmcache-vllm-integration-tests&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;img src=&#34;https://badge.buildkite.com/108ddd4ab482a2480999dec8c62a640a3315ed4e6c4e86798e.svg&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;Integration Tests&#34;
	
	
&gt;&lt;/a&gt;&lt;/p&gt;
   &lt;br /&gt;
&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;https://www.bestpractices.dev/projects/10841&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;img src=&#34;https://www.bestpractices.dev/projects/10841/badge&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;OpenSSF Best Practices&#34;
	
	
&gt;&lt;/a&gt;
&lt;a class=&#34;link&#34; href=&#34;https://scorecard.dev/viewer/?uri=github.com/LMCache/LMCache&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;img src=&#34;https://api.scorecard.dev/projects/github.com/LMCache/LMCache/badge&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;OpenSSF Scorecard&#34;
	
	
&gt;&lt;/a&gt;
&lt;a class=&#34;link&#34; href=&#34;https://deepwiki.com/LMCache/LMCache/&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;img src=&#34;https://deepwiki.com/badge.svg&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;Ask DeepWiki&#34;
	
	
&gt;&lt;/a&gt;
&lt;a class=&#34;link&#34; href=&#34;https://github.com/LMCache/LMCache/graphs/commit-activity&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;img src=&#34;https://img.shields.io/github/commit-activity/w/LMCache/LMCache&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;GitHub commit activity&#34;
	
	
&gt;&lt;/a&gt;
&lt;a class=&#34;link&#34; href=&#34;https://pypi.org/project/lmcache/&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;img src=&#34;https://img.shields.io/pypi/dm/lmcache&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;PyPI - Downloads&#34;
	
	
&gt;&lt;/a&gt;
&lt;a class=&#34;link&#34; href=&#34;https://www.youtube.com/channel/UC58zMz55n70rtf1Ak2PULJA&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;img src=&#34;https://img.shields.io/youtube/channel/views/UC58zMz55n70rtf1Ak2PULJA&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;YouTube Channel Views&#34;
	
	
&gt;&lt;/a&gt;&lt;/p&gt;
&lt;/div&gt;
&lt;hr&gt;
&lt;p&gt;| &lt;a class=&#34;link&#34; href=&#34;https://blog.lmcache.ai/&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;strong&gt;Blog&lt;/strong&gt;&lt;/a&gt;
| &lt;a class=&#34;link&#34; href=&#34;https://docs.lmcache.ai/&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;strong&gt;Documentation&lt;/strong&gt;&lt;/a&gt;
| &lt;a class=&#34;link&#34; href=&#34;https://join.slack.com/t/lmcacheworkspace/shared_invite/zt-36x1m765z-8FgDA_73vcXtlZ_4XvpE6Q&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;strong&gt;Join Slack&lt;/strong&gt;&lt;/a&gt;
| &lt;a class=&#34;link&#34; href=&#34;https://forms.gle/MHwLiYDU6kcW3dLj7&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;strong&gt;Interest Form&lt;/strong&gt;&lt;/a&gt;
| &lt;a class=&#34;link&#34; href=&#34;https://github.com/LMCache/LMCache/issues/1253&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;&lt;strong&gt;Roadmap&lt;/strong&gt;&lt;/a&gt;&lt;/p&gt;
&lt;p&gt;🔥 &lt;strong&gt;NEW: For enterprise-scale deployment of LMCache and vLLM, please check out vLLM &lt;a class=&#34;link&#34; href=&#34;https://github.com/vllm-project/production-stack&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;Production Stack&lt;/a&gt;. LMCache is also officially supported in &lt;a class=&#34;link&#34; href=&#34;https://github.com/llm-d/llm-d/&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;llm-d&lt;/a&gt; and &lt;a class=&#34;link&#34; href=&#34;https://github.com/kserve/kserve&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;KServe&lt;/a&gt;!&lt;/strong&gt;&lt;/p&gt;
&lt;h2 id=&#34;summary&#34;&gt;Summary
&lt;/h2&gt;&lt;p&gt;LMCache is an &lt;strong&gt;LLM&lt;/strong&gt; serving engine extension to &lt;strong&gt;reduce TTFT&lt;/strong&gt; and &lt;strong&gt;increase throughput&lt;/strong&gt;, especially under long-context scenarios. By storing the KV caches of reusable texts across various locations, including (GPU, CPU DRAM, Local Disk), LMCache reuses the KV caches of &lt;strong&gt;&lt;em&gt;any&lt;/em&gt;&lt;/strong&gt; reused text (not necessarily prefix) in &lt;strong&gt;&lt;em&gt;any&lt;/em&gt;&lt;/strong&gt; serving engine instance. Thus, LMCache saves precious GPU cycles and reduces user response delay.&lt;/p&gt;
&lt;p&gt;By combining LMCache with vLLM, developers achieve 3-10x delay savings and GPU cycle reduction in many LLM use cases, including multi-round QA and RAG.&lt;/p&gt;
&lt;p&gt;&lt;img src=&#34;https://github.com/user-attachments/assets/86137f17-f216-41a0-96a7-e537764f7a4c&#34;
	
	
	
	loading=&#34;lazy&#34;
	
		alt=&#34;performance&#34;
	
	
&gt;&lt;/p&gt;
&lt;h2 id=&#34;features&#34;&gt;Features
&lt;/h2&gt;&lt;ul&gt;
&lt;li&gt;&lt;input checked=&#34;&#34; disabled=&#34;&#34; type=&#34;checkbox&#34;&gt; 🔥 Integration with vLLM v1 with the following features:
&lt;ul&gt;
&lt;li&gt;High performance CPU KVCache offloading&lt;/li&gt;
&lt;li&gt;Disaggregated prefill&lt;/li&gt;
&lt;li&gt;P2P KVCache sharing&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;input checked=&#34;&#34; disabled=&#34;&#34; type=&#34;checkbox&#34;&gt; LMCache is supported in the &lt;a class=&#34;link&#34; href=&#34;https://github.com/vllm-project/production-stack/&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;vLLM production stack&lt;/a&gt;, &lt;a class=&#34;link&#34; href=&#34;https://github.com/llm-d/llm-d/&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;llm-d&lt;/a&gt;, and &lt;a class=&#34;link&#34; href=&#34;https://github.com/kserve/kserve&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;KServe&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;input checked=&#34;&#34; disabled=&#34;&#34; type=&#34;checkbox&#34;&gt; Stable support for non-prefix KV caches&lt;/li&gt;
&lt;li&gt;&lt;input checked=&#34;&#34; disabled=&#34;&#34; type=&#34;checkbox&#34;&gt; Storage support as follows:
&lt;ul&gt;
&lt;li&gt;CPU&lt;/li&gt;
&lt;li&gt;Disk&lt;/li&gt;
&lt;li&gt;&lt;a class=&#34;link&#34; href=&#34;https://github.com/ai-dynamo/nixl&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;NIXL&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;input checked=&#34;&#34; disabled=&#34;&#34; type=&#34;checkbox&#34;&gt; Installation support through pip and latest vLLM&lt;/li&gt;
&lt;/ul&gt;
&lt;h2 id=&#34;installation&#34;&gt;Installation
&lt;/h2&gt;&lt;p&gt;To use LMCache, simply install &lt;code&gt;lmcache&lt;/code&gt; from your package manager, e.g. pip:&lt;/p&gt;
&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-bash&#34; data-lang=&#34;bash&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;pip install lmcache
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;p&gt;Works on Linux NVIDIA GPU platform.&lt;/p&gt;
&lt;p&gt;More &lt;a class=&#34;link&#34; href=&#34;https://docs.lmcache.ai/getting_started/installation&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;detailed installation instructions&lt;/a&gt; are available in the docs, particularly if you are not using the latest stable version of vllm or using another serving engine with different dependencies. Any &amp;ldquo;undefined symbol&amp;rdquo; or torch mismatch versions can be resolved in the documentation.&lt;/p&gt;
&lt;h2 id=&#34;getting-started&#34;&gt;Getting started
&lt;/h2&gt;&lt;p&gt;The best way to get started is to checkout the &lt;a class=&#34;link&#34; href=&#34;https://docs.lmcache.ai/getting_started/quickstart/&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;Quickstart Examples&lt;/a&gt; in the docs.&lt;/p&gt;
&lt;h2 id=&#34;documentation&#34;&gt;Documentation
&lt;/h2&gt;&lt;p&gt;Check out the LMCache &lt;a class=&#34;link&#34; href=&#34;https://docs.lmcache.ai/&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;documentation&lt;/a&gt; which is available online.&lt;/p&gt;
&lt;p&gt;We also post regularly in &lt;a class=&#34;link&#34; href=&#34;https://blog.lmcache.ai/&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;LMCache blogs&lt;/a&gt;.&lt;/p&gt;
&lt;h2 id=&#34;examples&#34;&gt;Examples
&lt;/h2&gt;&lt;p&gt;Go hands-on with our &lt;a class=&#34;link&#34; href=&#34;https://github.com/LMCache/LMCache/tree/dev/examples&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;examples&lt;/a&gt;,
demonstrating how to address different use cases with LMCache.&lt;/p&gt;
&lt;h2 id=&#34;interested-in-connecting&#34;&gt;Interested in Connecting?
&lt;/h2&gt;&lt;p&gt;Fill out the &lt;a class=&#34;link&#34; href=&#34;https://forms.gle/mQfQDUXbKfp2St1z7&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;interest form&lt;/a&gt;, &lt;a class=&#34;link&#34; href=&#34;https://mailchi.mp/tensormesh/lmcache-sign-up-newsletter&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;sign up for our newsletter&lt;/a&gt;, &lt;a class=&#34;link&#34; href=&#34;https://join.slack.com/t/lmcacheworkspace/shared_invite/zt-2viziwhue-5Amprc9k5hcIdXT7XevTaQ&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;join LMCache slack&lt;/a&gt;, &lt;a class=&#34;link&#34; href=&#34;https://lmcache.ai/&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;check out LMCache website&lt;/a&gt;, or &lt;a class=&#34;link&#34; href=&#34;mailto:contact@lmcache.ai&#34; &gt;drop an email&lt;/a&gt;, and our team will reach out to you!&lt;/p&gt;
&lt;h2 id=&#34;community-meeting&#34;&gt;Community meeting
&lt;/h2&gt;&lt;p&gt;The &lt;a class=&#34;link&#34; href=&#34;https://uchicago.zoom.us/j/6603596916?pwd=Z1E5MDRWUSt2am5XbEt4dTFkNGx6QT09&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;community meeting&lt;/a&gt; for LMCache is hosted bi-weekly. All are welcome to join!&lt;/p&gt;
&lt;p&gt;Meetings are held bi-weekly on: Tuesdays at 9:00 AM PT – &lt;a class=&#34;link&#34; href=&#34;https://drive.usercontent.google.com/u/0/uc?id=1f5EXbooGcwNwzIpTgn5u4PHqXgfypMtu&amp;amp;export=download&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;Add to Calendar&lt;/a&gt;&lt;/p&gt;
&lt;p&gt;We keep notes from each meeting on this &lt;a class=&#34;link&#34; href=&#34;https://docs.google.com/document/d/1_Fl3vLtERFa3vTH00cezri78NihNBtSClK-_1tSrcow&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;document&lt;/a&gt; for summaries of standups, discussion, and action items.&lt;/p&gt;
&lt;p&gt;Recordings of meetings are available on the &lt;a class=&#34;link&#34; href=&#34;https://www.youtube.com/channel/UC58zMz55n70rtf1Ak2PULJA&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;YouTube LMCache channel&lt;/a&gt;.&lt;/p&gt;
&lt;h2 id=&#34;contributing&#34;&gt;Contributing
&lt;/h2&gt;&lt;p&gt;We welcome and value all contributions and collaborations.  Please check out &lt;a class=&#34;link&#34; href=&#34;CONTRIBUTING.md&#34; &gt;Contributing Guide&lt;/a&gt; on how to contribute.&lt;/p&gt;
&lt;p&gt;We continually update &lt;a class=&#34;link&#34; href=&#34;https://github.com/LMCache/LMCache/issues/627&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;[Onboarding] Welcoming contributors with good first issues!&lt;/a&gt;&lt;/p&gt;
&lt;h2 id=&#34;citation&#34;&gt;Citation
&lt;/h2&gt;&lt;p&gt;If you use LMCache for your research, please cite our papers:&lt;/p&gt;
&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt; 1
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt; 2
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt; 3
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt; 4
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt; 5
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt; 6
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt; 7
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt; 8
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt; 9
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;10
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;11
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;12
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;13
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;14
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;15
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;16
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;17
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;18
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;19
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;20
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;21
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;22
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;23
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;24
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-fallback&#34; data-lang=&#34;fallback&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;@inproceedings{liu2024cachegen,
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  title={Cachegen: Kv cache compression and streaming for fast large language model serving},
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  author={Liu, Yuhan and Li, Hanchen and Cheng, Yihua and Ray, Siddhant and Huang, Yuyang and Zhang, Qizheng and Du, Kuntai and Yao, Jiayi and Lu, Shan and Ananthanarayanan, Ganesh and others},
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  booktitle={Proceedings of the ACM SIGCOMM 2024 Conference},
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  pages={38--56},
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  year={2024}
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;}
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;@article{cheng2024large,
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  title={Do Large Language Models Need a Content Delivery Network?},
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  author={Cheng, Yihua and Du, Kuntai and Yao, Jiayi and Jiang, Junchen},
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  journal={arXiv preprint arXiv:2409.13761},
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  year={2024}
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;}
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;@inproceedings{10.1145/3689031.3696098,
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  author = {Yao, Jiayi and Li, Hanchen and Liu, Yuhan and Ray, Siddhant and Cheng, Yihua and Zhang, Qizheng and Du, Kuntai and Lu, Shan and Jiang, Junchen},
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  title = {CacheBlend: Fast Large Language Model Serving for RAG with Cached Knowledge Fusion},
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  year = {2025},
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  url = {https://doi.org/10.1145/3689031.3696098},
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  doi = {10.1145/3689031.3696098},
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  booktitle = {Proceedings of the Twentieth European Conference on Computer Systems},
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  pages = {94–109},
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;}
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;h2 id=&#34;socials&#34;&gt;Socials
&lt;/h2&gt;&lt;p&gt;&lt;a class=&#34;link&#34; href=&#34;https://www.linkedin.com/company/lmcache-lab/?viewAsMember=true&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;Linkedin&lt;/a&gt; | &lt;a class=&#34;link&#34; href=&#34;https://x.com/lmcache&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;Twitter&lt;/a&gt; | &lt;a class=&#34;link&#34; href=&#34;https://www.youtube.com/@LMCacheTeam&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;Youtube&lt;/a&gt;&lt;/p&gt;
&lt;h2 id=&#34;license&#34;&gt;License
&lt;/h2&gt;&lt;p&gt;The LMCache codebase is licensed under Apache License 2.0. See the &lt;a class=&#34;link&#34; href=&#34;LICENSE&#34; &gt;LICENSE&lt;/a&gt; file for details.&lt;/p&gt;
</description>
        </item>
        
    </channel>
</rss>
