<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:content="http://purl.org/rss/1.0/modules/content/">
  <channel>
    <title>Distributed-Systems on Li Cao&#39;s Blog</title>
    <link>https://l1-ca0.github.io/tags/distributed-systems/</link>
    <description>Recent content in Distributed-Systems on Li Cao&#39;s Blog</description>
    <generator>Hugo -- 0.148.0</generator>
    <language>en-us</language>
    <lastBuildDate>Fri, 10 Apr 2026 12:00:00 +0000</lastBuildDate>
    <atom:link href="https://l1-ca0.github.io/tags/distributed-systems/index.xml" rel="self" type="application/rss+xml" />
    <item>
      <title>Distributed Systems Project Proposal</title>
      <link>https://l1-ca0.github.io/posts/distributed-systems-project-proposal/</link>
      <pubDate>Fri, 10 Apr 2026 12:00:00 +0000</pubDate>
      <guid>https://l1-ca0.github.io/posts/distributed-systems-project-proposal/</guid>
      <description>&lt;p&gt;A high-performance inference gateway in C++ that routes client requests to a cluster of LLM serving replicas. The gateway provides KV-cache-aware routing via consistent hashing, weighted load balancing, fault tolerance with mid-stream failover and request hedging, circuit breaker for degraded replica detection, streaming token delivery, backpressure management, and zero-downtime rolling updates. Replicas participate in a SWIM gossip protocol for decentralized membership and failure detection.&lt;/p&gt;
&lt;div style=&#34;position:relative;padding-bottom:75%;height:0;overflow:hidden;&#34;&gt;
  &lt;iframe src=&#34;https://l1-ca0.github.io/ds_project_proposal.pdf&#34; style=&#34;position:absolute;top:0;left:0;width:100%;height:100%;border:0;&#34; title=&#34;Distributed Systems Project Proposal&#34;&gt;&lt;/iframe&gt;
&lt;/div&gt;</description>
    </item>
  </channel>
</rss>
