[{"data":1,"prerenderedAt":2893},["ShallowReactive",2],{"content-/topics/engineering/streaming-response-realtime-search-guide":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"category":5,"tags":11,"author":17,"featured":18,"series":19,"seriesOrder":20,"readingTime":21,"image":22,"body":23,"_type":2887,"_id":2888,"_source":2889,"_file":2890,"_stem":2891,"_extension":2892},"/topics/engineering/streaming-response-realtime-search-guide","engineering",false,"","流式响应与实时搜索完全指南：把 AI Web 体验从等待改成对话","AI 产品的体验差距，很多时候不在最终答案，而在等待过程。本文从 Web 工程视角讲清流式响应、SSE、WebSocket、增量渲染、实时搜索建议、取消控制与观测方案，帮你把 AI 页面做得更像产品，而不是接口演示。","2026-03-08",[12,13,14,15,16],"流式响应","实时搜索","SSE","Web体验","AI应用","小明",true,"ai-integration-and-intelligent-applications",4,17,"/images/articles/streaming-response-realtime-search-guide-cover.jpg",{"type":24,"children":25,"toc":2825},"root",[26,34,40,45,50,70,75,94,99,104,117,122,135,140,153,194,199,203,210,215,224,229,242,249,254,259,282,287,293,298,321,326,329,340,351,362,375,385,408,413,436,449,454,472,484,496,502,507,525,530,548,554,647,650,663,676,1065,1070,1075,1098,1104,1111,1116,1122,1127,1133,1138,1141,1147,1152,1170,1175,1181,1204,1210,1446,1451,1474,1479,1497,1503,1521,1526,1529,1535,1540,1545,1568,1573,1596,1602,1776,1782,1787,1792,1795,1801,1806,1824,1829,1834,1840,1845,1850,1883,1888,1906,1911,1917,1923,1928,1934,1939,1945,1950,1956,2157,2162,2170,2173,2179,2184,2189,2207,2212,2218,2223,2241,2246,2254,2257,2263,2268,2273,2301,2307,2407,2413,2418,2441,2446,2449,2455,2461,2466,2484,2489,2507,2513,2518,2524,2529,2547,2552,2555,2561,2566,2613,2618,2658,2663,2694,2699,2730,2733,2738,2743,2793,2798,2806,2811,2819],{"type":27,"tag":28,"props":29,"children":31},"element","h1",{"id":30},"流式响应与实时搜索完全指南把-ai-web-体验从等待改成对话",[32],{"type":33,"value":8},"text",{"type":27,"tag":35,"props":36,"children":37},"p",{},[38],{"type":33,"value":39},"同样一个 AI 功能，为什么有的产品一用就觉得“挺聪明”，有的却让人感觉“像在等接口返回”？",{"type":27,"tag":35,"props":41,"children":42},{},[43],{"type":33,"value":44},"很多时候，差距不在模型本身，而在交互链路。",{"type":27,"tag":35,"props":46,"children":47},{},[48],{"type":33,"value":49},"用户并不会天然关心：",{"type":27,"tag":51,"props":52,"children":53},"ul",{},[54,60,65],{"type":27,"tag":55,"props":56,"children":57},"li",{},[58],{"type":33,"value":59},"你用的是哪个 provider",{"type":27,"tag":55,"props":61,"children":62},{},[63],{"type":33,"value":64},"你的 prompt 写得多漂亮",{"type":27,"tag":55,"props":66,"children":67},{},[68],{"type":33,"value":69},"你的服务端是不是做了 fancy 的 orchestration",{"type":27,"tag":35,"props":71,"children":72},{},[73],{"type":33,"value":74},"用户只会直接感受到三件事：",{"type":27,"tag":76,"props":77,"children":78},"ol",{},[79,84,89],{"type":27,"tag":55,"props":80,"children":81},{},[82],{"type":33,"value":83},"点完以后多久有反馈",{"type":27,"tag":55,"props":85,"children":86},{},[87],{"type":33,"value":88},"过程中是否知道系统在干什么",{"type":27,"tag":55,"props":90,"children":91},{},[92],{"type":33,"value":93},"当他改变主意时，能不能立即打断和继续",{"type":27,"tag":35,"props":95,"children":96},{},[97],{"type":33,"value":98},"这就是为什么流式响应和实时搜索体验，在 AI 产品里格外重要。",{"type":27,"tag":35,"props":100,"children":101},{},[102],{"type":33,"value":103},"一个总耗时 8 秒的回答：",{"type":27,"tag":51,"props":105,"children":106},{},[107,112],{"type":27,"tag":55,"props":108,"children":109},{},[110],{"type":33,"value":111},"如果 8 秒后一次性吐出来，用户觉得卡",{"type":27,"tag":55,"props":113,"children":114},{},[115],{"type":33,"value":116},"如果 600ms 内开始出字，用户觉得系统在思考",{"type":27,"tag":35,"props":118,"children":119},{},[120],{"type":33,"value":121},"一个搜索框：",{"type":27,"tag":51,"props":123,"children":124},{},[125,130],{"type":27,"tag":55,"props":126,"children":127},{},[128],{"type":33,"value":129},"如果每次都整页提交再等待",{"type":27,"tag":55,"props":131,"children":132},{},[133],{"type":33,"value":134},"和边输入边给建议、边更新结果",{"type":27,"tag":35,"props":136,"children":137},{},[138],{"type":33,"value":139},"体感几乎不是同一个产品。",{"type":27,"tag":35,"props":141,"children":142},{},[143,145,151],{"type":33,"value":144},"所以这篇文章不想只讲 ",{"type":27,"tag":146,"props":147,"children":149},"code",{"className":148},[],[150],{"type":33,"value":14},{"type":33,"value":152}," 怎么写，而是想把“AI Web 体验”这件事完整讲透：",{"type":27,"tag":76,"props":154,"children":155},{},[156,161,179,184,189],{"type":27,"tag":55,"props":157,"children":158},{},[159],{"type":33,"value":160},"为什么流式响应本质上是在优化感知延迟",{"type":27,"tag":55,"props":162,"children":163},{},[164,169,171,177],{"type":27,"tag":146,"props":165,"children":167},{"className":166},[],[168],{"type":33,"value":14},{"type":33,"value":170},"、",{"type":27,"tag":146,"props":172,"children":174},{"className":173},[],[175],{"type":33,"value":176},"WebSocket",{"type":33,"value":178},"、轮询分别适合什么场景",{"type":27,"tag":55,"props":180,"children":181},{},[182],{"type":33,"value":183},"怎样设计增量渲染、取消请求、错误恢复",{"type":27,"tag":55,"props":185,"children":186},{},[187],{"type":33,"value":188},"实时搜索为什么容易把后端打爆，又该怎样兜底",{"type":27,"tag":55,"props":190,"children":191},{},[192],{"type":33,"value":193},"怎样把这套体验做成可观测、可治理、可上线的系统",{"type":27,"tag":35,"props":195,"children":196},{},[197],{"type":33,"value":198},"如果你在做聊天、AI 搜索、智能问答、辅助创作，这篇会很实用。",{"type":27,"tag":200,"props":201,"children":202},"hr",{},[],{"type":27,"tag":204,"props":205,"children":207},"h2",{"id":206},"一先统一认知流式体验优化的不是总耗时而是用户心智",[208],{"type":33,"value":209},"一、先统一认知：流式体验优化的不是总耗时，而是用户心智",{"type":27,"tag":35,"props":211,"children":212},{},[213],{"type":33,"value":214},"很多工程师讨论流式输出时，最常见的误区是：",{"type":27,"tag":216,"props":217,"children":218},"blockquote",{},[219],{"type":27,"tag":35,"props":220,"children":221},{},[222],{"type":33,"value":223},"流式又不会让模型真的更快，只是把结果拆开发而已。",{"type":27,"tag":35,"props":225,"children":226},{},[227],{"type":33,"value":228},"这句话技术上没错，但产品上不够完整。",{"type":27,"tag":35,"props":230,"children":231},{},[232,234,240],{"type":33,"value":233},"因为用户体验里有一个非常关键的概念：",{"type":27,"tag":235,"props":236,"children":237},"strong",{},[238],{"type":33,"value":239},"感知等待时间",{"type":33,"value":241},"。",{"type":27,"tag":243,"props":244,"children":246},"h3",{"id":245},"_11-为什么先出一点这么值钱",[247],{"type":33,"value":248},"1.1 为什么“先出一点”这么值钱",{"type":27,"tag":35,"props":250,"children":251},{},[252],{"type":33,"value":253},"当用户点击发送后，如果界面立刻进入完全静止状态，哪怕只等 2 秒，体感也容易变差。",{"type":27,"tag":35,"props":255,"children":256},{},[257],{"type":33,"value":258},"但如果系统在 500ms 内就开始：",{"type":27,"tag":51,"props":260,"children":261},{},[262,267,272,277],{"type":27,"tag":55,"props":263,"children":264},{},[265],{"type":33,"value":266},"显示 loading 状态",{"type":27,"tag":55,"props":268,"children":269},{},[270],{"type":33,"value":271},"回显用户消息",{"type":27,"tag":55,"props":273,"children":274},{},[275],{"type":33,"value":276},"给出搜索建议",{"type":27,"tag":55,"props":278,"children":279},{},[280],{"type":33,"value":281},"逐步输出文本",{"type":27,"tag":35,"props":283,"children":284},{},[285],{"type":33,"value":286},"用户对等待的容忍度会明显提高。",{"type":27,"tag":243,"props":288,"children":290},{"id":289},"_12-流式不只是酷而是降低放弃率",[291],{"type":33,"value":292},"1.2 流式不只是“酷”，而是降低放弃率",{"type":27,"tag":35,"props":294,"children":295},{},[296],{"type":33,"value":297},"在很多 AI 产品里，流式最大的业务价值并不是炫技，而是：",{"type":27,"tag":51,"props":299,"children":300},{},[301,306,311,316],{"type":27,"tag":55,"props":302,"children":303},{},[304],{"type":33,"value":305},"提高首反馈速度感知",{"type":27,"tag":55,"props":307,"children":308},{},[309],{"type":33,"value":310},"降低用户重复点击",{"type":27,"tag":55,"props":312,"children":313},{},[314],{"type":33,"value":315},"降低“系统是不是卡了”的不确定性",{"type":27,"tag":55,"props":317,"children":318},{},[319],{"type":33,"value":320},"让长回答看起来不是“死等”",{"type":27,"tag":35,"props":322,"children":323},{},[324],{"type":33,"value":325},"这就是为什么一个真正成熟的 AI 前端，几乎都会认真设计流式链路。",{"type":27,"tag":200,"props":327,"children":328},{},[],{"type":27,"tag":204,"props":330,"children":332},{"id":331},"二三种常见实时传输方式别一上来就默认-websocket",[333,335],{"type":33,"value":334},"二、三种常见实时传输方式，别一上来就默认 ",{"type":27,"tag":146,"props":336,"children":338},{"className":337},[],[339],{"type":33,"value":176},{"type":27,"tag":35,"props":341,"children":342},{},[343,345,350],{"type":33,"value":344},"很多团队一听“实时”，第一反应就是 ",{"type":27,"tag":146,"props":346,"children":348},{"className":347},[],[349],{"type":33,"value":176},{"type":33,"value":241},{"type":27,"tag":35,"props":352,"children":353},{},[354,356,361],{"type":33,"value":355},"但真实工程里，AI 文本生成场景最常用的反而是 ",{"type":27,"tag":146,"props":357,"children":359},{"className":358},[],[360],{"type":33,"value":14},{"type":33,"value":241},{"type":27,"tag":243,"props":363,"children":365},{"id":364},"_21-sse最适合单向增量输出",[366,368,373],{"type":33,"value":367},"2.1 ",{"type":27,"tag":146,"props":369,"children":371},{"className":370},[],[372],{"type":33,"value":14},{"type":33,"value":374},"：最适合单向增量输出",{"type":27,"tag":35,"props":376,"children":377},{},[378,383],{"type":27,"tag":146,"props":379,"children":381},{"className":380},[],[382],{"type":33,"value":14},{"type":33,"value":384},"（Server-Sent Events）的特点是：",{"type":27,"tag":51,"props":386,"children":387},{},[388,393,398,403],{"type":27,"tag":55,"props":389,"children":390},{},[391],{"type":33,"value":392},"服务端持续向客户端推送数据",{"type":27,"tag":55,"props":394,"children":395},{},[396],{"type":33,"value":397},"协议基于普通 HTTP",{"type":27,"tag":55,"props":399,"children":400},{},[401],{"type":33,"value":402},"实现相对简单",{"type":27,"tag":55,"props":404,"children":405},{},[406],{"type":33,"value":407},"天然适合“服务端不断出字，客户端不断展示”",{"type":27,"tag":35,"props":409,"children":410},{},[411],{"type":33,"value":412},"它特别适合：",{"type":27,"tag":51,"props":414,"children":415},{},[416,421,426,431],{"type":27,"tag":55,"props":417,"children":418},{},[419],{"type":33,"value":420},"聊天回答流式输出",{"type":27,"tag":55,"props":422,"children":423},{},[424],{"type":33,"value":425},"摘要生成",{"type":27,"tag":55,"props":427,"children":428},{},[429],{"type":33,"value":430},"文本改写",{"type":27,"tag":55,"props":432,"children":433},{},[434],{"type":33,"value":435},"搜索结果增量返回",{"type":27,"tag":243,"props":437,"children":439},{"id":438},"_22-websocket适合双向高频互动",[440,442,447],{"type":33,"value":441},"2.2 ",{"type":27,"tag":146,"props":443,"children":445},{"className":444},[],[446],{"type":33,"value":176},{"type":33,"value":448},"：适合双向高频互动",{"type":27,"tag":35,"props":450,"children":451},{},[452],{"type":33,"value":453},"如果你的场景需要：",{"type":27,"tag":51,"props":455,"children":456},{},[457,462,467],{"type":27,"tag":55,"props":458,"children":459},{},[460],{"type":33,"value":461},"双向实时协作",{"type":27,"tag":55,"props":463,"children":464},{},[465],{"type":33,"value":466},"高频事件交互",{"type":27,"tag":55,"props":468,"children":469},{},[470],{"type":33,"value":471},"客户端不断回传控制信号",{"type":27,"tag":35,"props":473,"children":474},{},[475,477,482],{"type":33,"value":476},"那 ",{"type":27,"tag":146,"props":478,"children":480},{"className":479},[],[481],{"type":33,"value":176},{"type":33,"value":483}," 更合适。",{"type":27,"tag":35,"props":485,"children":486},{},[487,489,494],{"type":33,"value":488},"但如果只是单向输出文本，用 ",{"type":27,"tag":146,"props":490,"children":492},{"className":491},[],[493],{"type":33,"value":176},{"type":33,"value":495}," 往往复杂度更高、收益不一定更高。",{"type":27,"tag":243,"props":497,"children":499},{"id":498},"_23-轮询和长轮询不是不能用但通常不够优雅",[500],{"type":33,"value":501},"2.3 轮询和长轮询：不是不能用，但通常不够优雅",{"type":27,"tag":35,"props":503,"children":504},{},[505],{"type":33,"value":506},"轮询更适合：",{"type":27,"tag":51,"props":508,"children":509},{},[510,515,520],{"type":27,"tag":55,"props":511,"children":512},{},[513],{"type":33,"value":514},"兼容旧系统",{"type":27,"tag":55,"props":516,"children":517},{},[518],{"type":33,"value":519},"实时性要求没那么高",{"type":27,"tag":55,"props":521,"children":522},{},[523],{"type":33,"value":524},"开发和部署环境有限",{"type":27,"tag":35,"props":526,"children":527},{},[528],{"type":33,"value":529},"但在 AI 文本交互里，轮询会天然带来：",{"type":27,"tag":51,"props":531,"children":532},{},[533,538,543],{"type":27,"tag":55,"props":534,"children":535},{},[536],{"type":33,"value":537},"空请求浪费",{"type":27,"tag":55,"props":539,"children":540},{},[541],{"type":33,"value":542},"延迟抖动",{"type":27,"tag":55,"props":544,"children":545},{},[546],{"type":33,"value":547},"体验断裂",{"type":27,"tag":243,"props":549,"children":551},{"id":550},"_24-一个简单判断表",[552],{"type":33,"value":553},"2.4 一个简单判断表",{"type":27,"tag":555,"props":556,"children":557},"table",{},[558,582],{"type":27,"tag":559,"props":560,"children":561},"thead",{},[562],{"type":27,"tag":563,"props":564,"children":565},"tr",{},[566,572,577],{"type":27,"tag":567,"props":568,"children":569},"th",{},[570],{"type":33,"value":571},"方案",{"type":27,"tag":567,"props":573,"children":574},{},[575],{"type":33,"value":576},"适合场景",{"type":27,"tag":567,"props":578,"children":579},{},[580],{"type":33,"value":581},"不适合场景",{"type":27,"tag":583,"props":584,"children":585},"tbody",{},[586,608,629],{"type":27,"tag":563,"props":587,"children":588},{},[589,598,603],{"type":27,"tag":590,"props":591,"children":592},"td",{},[593],{"type":27,"tag":146,"props":594,"children":596},{"className":595},[],[597],{"type":33,"value":14},{"type":27,"tag":590,"props":599,"children":600},{},[601],{"type":33,"value":602},"单向流式文本、增量结果",{"type":27,"tag":590,"props":604,"children":605},{},[606],{"type":33,"value":607},"强双向互动",{"type":27,"tag":563,"props":609,"children":610},{},[611,619,624],{"type":27,"tag":590,"props":612,"children":613},{},[614],{"type":27,"tag":146,"props":615,"children":617},{"className":616},[],[618],{"type":33,"value":176},{"type":27,"tag":590,"props":620,"children":621},{},[622],{"type":33,"value":623},"协作、双向状态同步",{"type":27,"tag":590,"props":625,"children":626},{},[627],{"type":33,"value":628},"只做简单文本流",{"type":27,"tag":563,"props":630,"children":631},{},[632,637,642],{"type":27,"tag":590,"props":633,"children":634},{},[635],{"type":33,"value":636},"轮询",{"type":27,"tag":590,"props":638,"children":639},{},[640],{"type":33,"value":641},"低实时要求、兼容方案",{"type":27,"tag":590,"props":643,"children":644},{},[645],{"type":33,"value":646},"高体验 AI 对话",{"type":27,"tag":200,"props":648,"children":649},{},[],{"type":27,"tag":204,"props":651,"children":653},{"id":652},"三sse-落地时真正难的不是写出来而是把整条链路打通",[654,656,661],{"type":33,"value":655},"三、",{"type":27,"tag":146,"props":657,"children":659},{"className":658},[],[660],{"type":33,"value":14},{"type":33,"value":662}," 落地时，真正难的不是写出来，而是把整条链路打通",{"type":27,"tag":243,"props":664,"children":666},{"id":665},"_31-一个最小-sse-示例",[667,669,674],{"type":33,"value":668},"3.1 一个最小 ",{"type":27,"tag":146,"props":670,"children":672},{"className":671},[],[673],{"type":33,"value":14},{"type":33,"value":675}," 示例",{"type":27,"tag":677,"props":678,"children":682},"pre",{"className":679,"code":680,"language":681,"meta":7,"style":7},"language-ts shiki shiki-themes github-dark","export async function streamAnswer(req, res) {\n  res.setHeader('Content-Type', 'text/event-stream; charset=utf-8')\n  res.setHeader('Cache-Control', 'no-cache, no-transform')\n  res.setHeader('Connection', 'keep-alive')\n\n  for await (const chunk of llm.stream(req.body.messages)) {\n    res.write(`data: ${JSON.stringify({ delta: chunk })}\\n\\n`)\n  }\n\n  res.write('event: done\\ndata: {}\\n\\n')\n  res.end()\n}\n","ts",[683],{"type":27,"tag":146,"props":684,"children":685},{"__ignoreMap":7},[686,741,779,813,846,855,905,977,986,994,1038,1056],{"type":27,"tag":687,"props":688,"children":691},"span",{"class":689,"line":690},"line",1,[692,698,703,708,714,720,726,731,736],{"type":27,"tag":687,"props":693,"children":695},{"style":694},"--shiki-default:#F97583",[696],{"type":33,"value":697},"export",{"type":27,"tag":687,"props":699,"children":700},{"style":694},[701],{"type":33,"value":702}," async",{"type":27,"tag":687,"props":704,"children":705},{"style":694},[706],{"type":33,"value":707}," function",{"type":27,"tag":687,"props":709,"children":711},{"style":710},"--shiki-default:#B392F0",[712],{"type":33,"value":713}," streamAnswer",{"type":27,"tag":687,"props":715,"children":717},{"style":716},"--shiki-default:#E1E4E8",[718],{"type":33,"value":719},"(",{"type":27,"tag":687,"props":721,"children":723},{"style":722},"--shiki-default:#FFAB70",[724],{"type":33,"value":725},"req",{"type":27,"tag":687,"props":727,"children":728},{"style":716},[729],{"type":33,"value":730},", ",{"type":27,"tag":687,"props":732,"children":733},{"style":722},[734],{"type":33,"value":735},"res",{"type":27,"tag":687,"props":737,"children":738},{"style":716},[739],{"type":33,"value":740},") {\n",{"type":27,"tag":687,"props":742,"children":744},{"class":689,"line":743},2,[745,750,755,759,765,769,774],{"type":27,"tag":687,"props":746,"children":747},{"style":716},[748],{"type":33,"value":749},"  res.",{"type":27,"tag":687,"props":751,"children":752},{"style":710},[753],{"type":33,"value":754},"setHeader",{"type":27,"tag":687,"props":756,"children":757},{"style":716},[758],{"type":33,"value":719},{"type":27,"tag":687,"props":760,"children":762},{"style":761},"--shiki-default:#9ECBFF",[763],{"type":33,"value":764},"'Content-Type'",{"type":27,"tag":687,"props":766,"children":767},{"style":716},[768],{"type":33,"value":730},{"type":27,"tag":687,"props":770,"children":771},{"style":761},[772],{"type":33,"value":773},"'text/event-stream; charset=utf-8'",{"type":27,"tag":687,"props":775,"children":776},{"style":716},[777],{"type":33,"value":778},")\n",{"type":27,"tag":687,"props":780,"children":782},{"class":689,"line":781},3,[783,787,791,795,800,804,809],{"type":27,"tag":687,"props":784,"children":785},{"style":716},[786],{"type":33,"value":749},{"type":27,"tag":687,"props":788,"children":789},{"style":710},[790],{"type":33,"value":754},{"type":27,"tag":687,"props":792,"children":793},{"style":716},[794],{"type":33,"value":719},{"type":27,"tag":687,"props":796,"children":797},{"style":761},[798],{"type":33,"value":799},"'Cache-Control'",{"type":27,"tag":687,"props":801,"children":802},{"style":716},[803],{"type":33,"value":730},{"type":27,"tag":687,"props":805,"children":806},{"style":761},[807],{"type":33,"value":808},"'no-cache, no-transform'",{"type":27,"tag":687,"props":810,"children":811},{"style":716},[812],{"type":33,"value":778},{"type":27,"tag":687,"props":814,"children":815},{"class":689,"line":20},[816,820,824,828,833,837,842],{"type":27,"tag":687,"props":817,"children":818},{"style":716},[819],{"type":33,"value":749},{"type":27,"tag":687,"props":821,"children":822},{"style":710},[823],{"type":33,"value":754},{"type":27,"tag":687,"props":825,"children":826},{"style":716},[827],{"type":33,"value":719},{"type":27,"tag":687,"props":829,"children":830},{"style":761},[831],{"type":33,"value":832},"'Connection'",{"type":27,"tag":687,"props":834,"children":835},{"style":716},[836],{"type":33,"value":730},{"type":27,"tag":687,"props":838,"children":839},{"style":761},[840],{"type":33,"value":841},"'keep-alive'",{"type":27,"tag":687,"props":843,"children":844},{"style":716},[845],{"type":33,"value":778},{"type":27,"tag":687,"props":847,"children":849},{"class":689,"line":848},5,[850],{"type":27,"tag":687,"props":851,"children":852},{"emptyLinePlaceholder":18},[853],{"type":33,"value":854},"\n",{"type":27,"tag":687,"props":856,"children":858},{"class":689,"line":857},6,[859,864,869,874,879,885,890,895,900],{"type":27,"tag":687,"props":860,"children":861},{"style":694},[862],{"type":33,"value":863},"  for",{"type":27,"tag":687,"props":865,"children":866},{"style":694},[867],{"type":33,"value":868}," await",{"type":27,"tag":687,"props":870,"children":871},{"style":716},[872],{"type":33,"value":873}," (",{"type":27,"tag":687,"props":875,"children":876},{"style":694},[877],{"type":33,"value":878},"const",{"type":27,"tag":687,"props":880,"children":882},{"style":881},"--shiki-default:#79B8FF",[883],{"type":33,"value":884}," chunk",{"type":27,"tag":687,"props":886,"children":887},{"style":694},[888],{"type":33,"value":889}," of",{"type":27,"tag":687,"props":891,"children":892},{"style":716},[893],{"type":33,"value":894}," llm.",{"type":27,"tag":687,"props":896,"children":897},{"style":710},[898],{"type":33,"value":899},"stream",{"type":27,"tag":687,"props":901,"children":902},{"style":716},[903],{"type":33,"value":904},"(req.body.messages)) {\n",{"type":27,"tag":687,"props":906,"children":908},{"class":689,"line":907},7,[909,914,919,923,928,933,938,943,948,953,958,963,968,973],{"type":27,"tag":687,"props":910,"children":911},{"style":716},[912],{"type":33,"value":913},"    res.",{"type":27,"tag":687,"props":915,"children":916},{"style":710},[917],{"type":33,"value":918},"write",{"type":27,"tag":687,"props":920,"children":921},{"style":716},[922],{"type":33,"value":719},{"type":27,"tag":687,"props":924,"children":925},{"style":761},[926],{"type":33,"value":927},"`data: ${",{"type":27,"tag":687,"props":929,"children":930},{"style":881},[931],{"type":33,"value":932},"JSON",{"type":27,"tag":687,"props":934,"children":935},{"style":761},[936],{"type":33,"value":937},".",{"type":27,"tag":687,"props":939,"children":940},{"style":710},[941],{"type":33,"value":942},"stringify",{"type":27,"tag":687,"props":944,"children":945},{"style":761},[946],{"type":33,"value":947},"({ delta: ",{"type":27,"tag":687,"props":949,"children":950},{"style":716},[951],{"type":33,"value":952},"chunk",{"type":27,"tag":687,"props":954,"children":955},{"style":761},[956],{"type":33,"value":957}," })",{"type":27,"tag":687,"props":959,"children":960},{"style":761},[961],{"type":33,"value":962},"}",{"type":27,"tag":687,"props":964,"children":965},{"style":881},[966],{"type":33,"value":967},"\\n\\n",{"type":27,"tag":687,"props":969,"children":970},{"style":761},[971],{"type":33,"value":972},"`",{"type":27,"tag":687,"props":974,"children":975},{"style":716},[976],{"type":33,"value":778},{"type":27,"tag":687,"props":978,"children":980},{"class":689,"line":979},8,[981],{"type":27,"tag":687,"props":982,"children":983},{"style":716},[984],{"type":33,"value":985},"  }\n",{"type":27,"tag":687,"props":987,"children":989},{"class":689,"line":988},9,[990],{"type":27,"tag":687,"props":991,"children":992},{"emptyLinePlaceholder":18},[993],{"type":33,"value":854},{"type":27,"tag":687,"props":995,"children":997},{"class":689,"line":996},10,[998,1002,1006,1010,1015,1020,1025,1029,1034],{"type":27,"tag":687,"props":999,"children":1000},{"style":716},[1001],{"type":33,"value":749},{"type":27,"tag":687,"props":1003,"children":1004},{"style":710},[1005],{"type":33,"value":918},{"type":27,"tag":687,"props":1007,"children":1008},{"style":716},[1009],{"type":33,"value":719},{"type":27,"tag":687,"props":1011,"children":1012},{"style":761},[1013],{"type":33,"value":1014},"'event: done",{"type":27,"tag":687,"props":1016,"children":1017},{"style":881},[1018],{"type":33,"value":1019},"\\n",{"type":27,"tag":687,"props":1021,"children":1022},{"style":761},[1023],{"type":33,"value":1024},"data: {}",{"type":27,"tag":687,"props":1026,"children":1027},{"style":881},[1028],{"type":33,"value":967},{"type":27,"tag":687,"props":1030,"children":1031},{"style":761},[1032],{"type":33,"value":1033},"'",{"type":27,"tag":687,"props":1035,"children":1036},{"style":716},[1037],{"type":33,"value":778},{"type":27,"tag":687,"props":1039,"children":1041},{"class":689,"line":1040},11,[1042,1046,1051],{"type":27,"tag":687,"props":1043,"children":1044},{"style":716},[1045],{"type":33,"value":749},{"type":27,"tag":687,"props":1047,"children":1048},{"style":710},[1049],{"type":33,"value":1050},"end",{"type":27,"tag":687,"props":1052,"children":1053},{"style":716},[1054],{"type":33,"value":1055},"()\n",{"type":27,"tag":687,"props":1057,"children":1059},{"class":689,"line":1058},12,[1060],{"type":27,"tag":687,"props":1061,"children":1062},{"style":716},[1063],{"type":33,"value":1064},"}\n",{"type":27,"tag":35,"props":1066,"children":1067},{},[1068],{"type":33,"value":1069},"代码看起来不复杂。",{"type":27,"tag":35,"props":1071,"children":1072},{},[1073],{"type":33,"value":1074},"真正复杂的是，你要确认：",{"type":27,"tag":51,"props":1076,"children":1077},{},[1078,1083,1088,1093],{"type":27,"tag":55,"props":1079,"children":1080},{},[1081],{"type":33,"value":1082},"网关是否允许持续流式透传",{"type":27,"tag":55,"props":1084,"children":1085},{},[1086],{"type":33,"value":1087},"代理层是否会缓冲响应",{"type":27,"tag":55,"props":1089,"children":1090},{},[1091],{"type":33,"value":1092},"CDN 是否会把流式连接处理坏",{"type":27,"tag":55,"props":1094,"children":1095},{},[1096],{"type":33,"value":1097},"前端断开时服务端能否及时停止上游请求",{"type":27,"tag":243,"props":1099,"children":1101},{"id":1100},"_32-最常见的链路问题",[1102],{"type":33,"value":1103},"3.2 最常见的链路问题",{"type":27,"tag":1105,"props":1106,"children":1108},"h4",{"id":1107},"问题-a代理缓冲",[1109],{"type":33,"value":1110},"问题 A：代理缓冲",{"type":27,"tag":35,"props":1112,"children":1113},{},[1114],{"type":33,"value":1115},"有些代理默认会缓冲响应，导致你以为自己在流式输出，用户实际还是最后一次性收到。",{"type":27,"tag":1105,"props":1117,"children":1119},{"id":1118},"问题-b超时配置过短",[1120],{"type":33,"value":1121},"问题 B：超时配置过短",{"type":27,"tag":35,"props":1123,"children":1124},{},[1125],{"type":33,"value":1126},"长回答还没结束，链路已经被代理断开。",{"type":27,"tag":1105,"props":1128,"children":1130},{"id":1129},"问题-c服务端只顾输出不管取消",[1131],{"type":33,"value":1132},"问题 C：服务端只顾输出，不管取消",{"type":27,"tag":35,"props":1134,"children":1135},{},[1136],{"type":33,"value":1137},"用户关闭页面了，模型还在后台继续生成，这会平白浪费 token 和资源。",{"type":27,"tag":200,"props":1139,"children":1140},{},[],{"type":27,"tag":204,"props":1142,"children":1144},{"id":1143},"四前端增量渲染不是把文本拼起来就完事",[1145],{"type":33,"value":1146},"四、前端增量渲染：不是把文本拼起来就完事",{"type":27,"tag":35,"props":1148,"children":1149},{},[1150],{"type":33,"value":1151},"很多前端在做 AI 聊天时，会先写一个最简单版本：",{"type":27,"tag":51,"props":1153,"children":1154},{},[1155,1160,1165],{"type":27,"tag":55,"props":1156,"children":1157},{},[1158],{"type":33,"value":1159},"收到一个 chunk",{"type":27,"tag":55,"props":1161,"children":1162},{},[1163],{"type":33,"value":1164},"直接 append 到字符串尾部",{"type":27,"tag":55,"props":1166,"children":1167},{},[1168],{"type":33,"value":1169},"然后重新渲染整块内容",{"type":27,"tag":35,"props":1171,"children":1172},{},[1173],{"type":33,"value":1174},"这在 demo 阶段够用，但实际产品里会遇到很多细节问题。",{"type":27,"tag":243,"props":1176,"children":1178},{"id":1177},"_41-需要处理的四种状态",[1179],{"type":33,"value":1180},"4.1 需要处理的四种状态",{"type":27,"tag":76,"props":1182,"children":1183},{},[1184,1189,1194,1199],{"type":27,"tag":55,"props":1185,"children":1186},{},[1187],{"type":33,"value":1188},"正在生成",{"type":27,"tag":55,"props":1190,"children":1191},{},[1192],{"type":33,"value":1193},"用户主动取消",{"type":27,"tag":55,"props":1195,"children":1196},{},[1197],{"type":33,"value":1198},"网络中断或流式异常终止",{"type":27,"tag":55,"props":1200,"children":1201},{},[1202],{"type":33,"value":1203},"生成完成后进入可复制 / 可引用状态",{"type":27,"tag":243,"props":1205,"children":1207},{"id":1206},"_42-一个-react-风格的思路示例",[1208],{"type":33,"value":1209},"4.2 一个 React 风格的思路示例",{"type":27,"tag":677,"props":1211,"children":1213},{"className":679,"code":1212,"language":681,"meta":7,"style":7},"const [message, setMessage] = useState('')\nconst [status, setStatus] = useState\u003C'idle' | 'streaming' | 'done' | 'error'>('idle')\n\nfunction appendDelta(delta: string) {\n  setMessage(prev => prev + delta)\n}\n",[1214],{"type":27,"tag":146,"props":1215,"children":1216},{"__ignoreMap":7},[1217,1271,1359,1366,1402,1439],{"type":27,"tag":687,"props":1218,"children":1219},{"class":689,"line":690},[1220,1224,1229,1234,1238,1243,1248,1253,1258,1262,1267],{"type":27,"tag":687,"props":1221,"children":1222},{"style":694},[1223],{"type":33,"value":878},{"type":27,"tag":687,"props":1225,"children":1226},{"style":716},[1227],{"type":33,"value":1228}," [",{"type":27,"tag":687,"props":1230,"children":1231},{"style":881},[1232],{"type":33,"value":1233},"message",{"type":27,"tag":687,"props":1235,"children":1236},{"style":716},[1237],{"type":33,"value":730},{"type":27,"tag":687,"props":1239,"children":1240},{"style":881},[1241],{"type":33,"value":1242},"setMessage",{"type":27,"tag":687,"props":1244,"children":1245},{"style":716},[1246],{"type":33,"value":1247},"] ",{"type":27,"tag":687,"props":1249,"children":1250},{"style":694},[1251],{"type":33,"value":1252},"=",{"type":27,"tag":687,"props":1254,"children":1255},{"style":710},[1256],{"type":33,"value":1257}," useState",{"type":27,"tag":687,"props":1259,"children":1260},{"style":716},[1261],{"type":33,"value":719},{"type":27,"tag":687,"props":1263,"children":1264},{"style":761},[1265],{"type":33,"value":1266},"''",{"type":27,"tag":687,"props":1268,"children":1269},{"style":716},[1270],{"type":33,"value":778},{"type":27,"tag":687,"props":1272,"children":1273},{"class":689,"line":743},[1274,1278,1282,1287,1291,1296,1300,1304,1308,1313,1318,1323,1328,1332,1337,1341,1346,1351,1355],{"type":27,"tag":687,"props":1275,"children":1276},{"style":694},[1277],{"type":33,"value":878},{"type":27,"tag":687,"props":1279,"children":1280},{"style":716},[1281],{"type":33,"value":1228},{"type":27,"tag":687,"props":1283,"children":1284},{"style":881},[1285],{"type":33,"value":1286},"status",{"type":27,"tag":687,"props":1288,"children":1289},{"style":716},[1290],{"type":33,"value":730},{"type":27,"tag":687,"props":1292,"children":1293},{"style":881},[1294],{"type":33,"value":1295},"setStatus",{"type":27,"tag":687,"props":1297,"children":1298},{"style":716},[1299],{"type":33,"value":1247},{"type":27,"tag":687,"props":1301,"children":1302},{"style":694},[1303],{"type":33,"value":1252},{"type":27,"tag":687,"props":1305,"children":1306},{"style":710},[1307],{"type":33,"value":1257},{"type":27,"tag":687,"props":1309,"children":1310},{"style":716},[1311],{"type":33,"value":1312},"\u003C",{"type":27,"tag":687,"props":1314,"children":1315},{"style":761},[1316],{"type":33,"value":1317},"'idle'",{"type":27,"tag":687,"props":1319,"children":1320},{"style":694},[1321],{"type":33,"value":1322}," |",{"type":27,"tag":687,"props":1324,"children":1325},{"style":761},[1326],{"type":33,"value":1327}," 'streaming'",{"type":27,"tag":687,"props":1329,"children":1330},{"style":694},[1331],{"type":33,"value":1322},{"type":27,"tag":687,"props":1333,"children":1334},{"style":761},[1335],{"type":33,"value":1336}," 'done'",{"type":27,"tag":687,"props":1338,"children":1339},{"style":694},[1340],{"type":33,"value":1322},{"type":27,"tag":687,"props":1342,"children":1343},{"style":761},[1344],{"type":33,"value":1345}," 'error'",{"type":27,"tag":687,"props":1347,"children":1348},{"style":716},[1349],{"type":33,"value":1350},">(",{"type":27,"tag":687,"props":1352,"children":1353},{"style":761},[1354],{"type":33,"value":1317},{"type":27,"tag":687,"props":1356,"children":1357},{"style":716},[1358],{"type":33,"value":778},{"type":27,"tag":687,"props":1360,"children":1361},{"class":689,"line":781},[1362],{"type":27,"tag":687,"props":1363,"children":1364},{"emptyLinePlaceholder":18},[1365],{"type":33,"value":854},{"type":27,"tag":687,"props":1367,"children":1368},{"class":689,"line":20},[1369,1374,1379,1383,1388,1393,1398],{"type":27,"tag":687,"props":1370,"children":1371},{"style":694},[1372],{"type":33,"value":1373},"function",{"type":27,"tag":687,"props":1375,"children":1376},{"style":710},[1377],{"type":33,"value":1378}," appendDelta",{"type":27,"tag":687,"props":1380,"children":1381},{"style":716},[1382],{"type":33,"value":719},{"type":27,"tag":687,"props":1384,"children":1385},{"style":722},[1386],{"type":33,"value":1387},"delta",{"type":27,"tag":687,"props":1389,"children":1390},{"style":694},[1391],{"type":33,"value":1392},":",{"type":27,"tag":687,"props":1394,"children":1395},{"style":881},[1396],{"type":33,"value":1397}," string",{"type":27,"tag":687,"props":1399,"children":1400},{"style":716},[1401],{"type":33,"value":740},{"type":27,"tag":687,"props":1403,"children":1404},{"class":689,"line":848},[1405,1410,1414,1419,1424,1429,1434],{"type":27,"tag":687,"props":1406,"children":1407},{"style":710},[1408],{"type":33,"value":1409},"  setMessage",{"type":27,"tag":687,"props":1411,"children":1412},{"style":716},[1413],{"type":33,"value":719},{"type":27,"tag":687,"props":1415,"children":1416},{"style":722},[1417],{"type":33,"value":1418},"prev",{"type":27,"tag":687,"props":1420,"children":1421},{"style":694},[1422],{"type":33,"value":1423}," =>",{"type":27,"tag":687,"props":1425,"children":1426},{"style":716},[1427],{"type":33,"value":1428}," prev ",{"type":27,"tag":687,"props":1430,"children":1431},{"style":694},[1432],{"type":33,"value":1433},"+",{"type":27,"tag":687,"props":1435,"children":1436},{"style":716},[1437],{"type":33,"value":1438}," delta)\n",{"type":27,"tag":687,"props":1440,"children":1441},{"class":689,"line":857},[1442],{"type":27,"tag":687,"props":1443,"children":1444},{"style":716},[1445],{"type":33,"value":1064},{"type":27,"tag":35,"props":1447,"children":1448},{},[1449],{"type":33,"value":1450},"表面看很简单，但一旦内容里有：",{"type":27,"tag":51,"props":1452,"children":1453},{},[1454,1459,1464,1469],{"type":27,"tag":55,"props":1455,"children":1456},{},[1457],{"type":33,"value":1458},"Markdown",{"type":27,"tag":55,"props":1460,"children":1461},{},[1462],{"type":33,"value":1463},"代码块",{"type":27,"tag":55,"props":1465,"children":1466},{},[1467],{"type":33,"value":1468},"表格",{"type":27,"tag":55,"props":1470,"children":1471},{},[1472],{"type":33,"value":1473},"引用片段",{"type":27,"tag":35,"props":1475,"children":1476},{},[1477],{"type":33,"value":1478},"你就要考虑：",{"type":27,"tag":51,"props":1480,"children":1481},{},[1482,1487,1492],{"type":27,"tag":55,"props":1483,"children":1484},{},[1485],{"type":33,"value":1486},"每个 chunk 是否会打断语法结构",{"type":27,"tag":55,"props":1488,"children":1489},{},[1490],{"type":33,"value":1491},"渲染频率是否过高导致卡顿",{"type":27,"tag":55,"props":1493,"children":1494},{},[1495],{"type":33,"value":1496},"是否要做节流更新",{"type":27,"tag":243,"props":1498,"children":1500},{"id":1499},"_43-一个更稳的做法",[1501],{"type":33,"value":1502},"4.3 一个更稳的做法",{"type":27,"tag":51,"props":1504,"children":1505},{},[1506,1511,1516],{"type":27,"tag":55,"props":1507,"children":1508},{},[1509],{"type":33,"value":1510},"先在内存里累积 chunk",{"type":27,"tag":55,"props":1512,"children":1513},{},[1514],{"type":33,"value":1515},"每隔 30~100ms 批量刷新一次 UI",{"type":27,"tag":55,"props":1517,"children":1518},{},[1519],{"type":33,"value":1520},"最终完成时再做完整 Markdown 渲染",{"type":27,"tag":35,"props":1522,"children":1523},{},[1524],{"type":33,"value":1525},"这样既保留“在持续输出”的感觉，也避免高频重渲染造成卡顿。",{"type":27,"tag":200,"props":1527,"children":1528},{},[],{"type":27,"tag":204,"props":1530,"children":1532},{"id":1531},"五取消控制一个成熟-ai-产品必须允许用户反悔",[1533],{"type":33,"value":1534},"五、取消控制：一个成熟 AI 产品必须允许“用户反悔”",{"type":27,"tag":35,"props":1536,"children":1537},{},[1538],{"type":33,"value":1539},"这点非常重要，但经常被忽略。",{"type":27,"tag":35,"props":1541,"children":1542},{},[1543],{"type":33,"value":1544},"在 AI 场景里，用户经常会：",{"type":27,"tag":51,"props":1546,"children":1547},{},[1548,1553,1558,1563],{"type":27,"tag":55,"props":1549,"children":1550},{},[1551],{"type":33,"value":1552},"问到一半改问题",{"type":27,"tag":55,"props":1554,"children":1555},{},[1556],{"type":33,"value":1557},"发现答案方向错了",{"type":27,"tag":55,"props":1559,"children":1560},{},[1561],{"type":33,"value":1562},"直接切换页面",{"type":27,"tag":55,"props":1564,"children":1565},{},[1566],{"type":33,"value":1567},"连续发两个问题",{"type":27,"tag":35,"props":1569,"children":1570},{},[1571],{"type":33,"value":1572},"如果你的系统不支持取消，结果会很糟：",{"type":27,"tag":51,"props":1574,"children":1575},{},[1576,1581,1586,1591],{"type":27,"tag":55,"props":1577,"children":1578},{},[1579],{"type":33,"value":1580},"旧请求还在跑",{"type":27,"tag":55,"props":1582,"children":1583},{},[1584],{"type":33,"value":1585},"新请求又开始跑",{"type":27,"tag":55,"props":1587,"children":1588},{},[1589],{"type":33,"value":1590},"token 成本增加",{"type":27,"tag":55,"props":1592,"children":1593},{},[1594],{"type":33,"value":1595},"UI 状态混乱",{"type":27,"tag":243,"props":1597,"children":1599},{"id":1598},"_51-前端取消示例",[1600],{"type":33,"value":1601},"5.1 前端取消示例",{"type":27,"tag":677,"props":1603,"children":1605},{"className":679,"code":1604,"language":681,"meta":7,"style":7},"const controller = new AbortController()\n\nfetch('/api/chat/stream', {\n  method: 'POST',\n  body: JSON.stringify(payload),\n  signal: controller.signal,\n})\n\nfunction stopGeneration() {\n  controller.abort()\n}\n",[1606],{"type":27,"tag":146,"props":1607,"children":1608},{"__ignoreMap":7},[1609,1640,1647,1669,1687,1712,1720,1728,1735,1752,1769],{"type":27,"tag":687,"props":1610,"children":1611},{"class":689,"line":690},[1612,1616,1621,1626,1631,1636],{"type":27,"tag":687,"props":1613,"children":1614},{"style":694},[1615],{"type":33,"value":878},{"type":27,"tag":687,"props":1617,"children":1618},{"style":881},[1619],{"type":33,"value":1620}," controller",{"type":27,"tag":687,"props":1622,"children":1623},{"style":694},[1624],{"type":33,"value":1625}," =",{"type":27,"tag":687,"props":1627,"children":1628},{"style":694},[1629],{"type":33,"value":1630}," new",{"type":27,"tag":687,"props":1632,"children":1633},{"style":710},[1634],{"type":33,"value":1635}," AbortController",{"type":27,"tag":687,"props":1637,"children":1638},{"style":716},[1639],{"type":33,"value":1055},{"type":27,"tag":687,"props":1641,"children":1642},{"class":689,"line":743},[1643],{"type":27,"tag":687,"props":1644,"children":1645},{"emptyLinePlaceholder":18},[1646],{"type":33,"value":854},{"type":27,"tag":687,"props":1648,"children":1649},{"class":689,"line":781},[1650,1655,1659,1664],{"type":27,"tag":687,"props":1651,"children":1652},{"style":710},[1653],{"type":33,"value":1654},"fetch",{"type":27,"tag":687,"props":1656,"children":1657},{"style":716},[1658],{"type":33,"value":719},{"type":27,"tag":687,"props":1660,"children":1661},{"style":761},[1662],{"type":33,"value":1663},"'/api/chat/stream'",{"type":27,"tag":687,"props":1665,"children":1666},{"style":716},[1667],{"type":33,"value":1668},", {\n",{"type":27,"tag":687,"props":1670,"children":1671},{"class":689,"line":20},[1672,1677,1682],{"type":27,"tag":687,"props":1673,"children":1674},{"style":716},[1675],{"type":33,"value":1676},"  method: ",{"type":27,"tag":687,"props":1678,"children":1679},{"style":761},[1680],{"type":33,"value":1681},"'POST'",{"type":27,"tag":687,"props":1683,"children":1684},{"style":716},[1685],{"type":33,"value":1686},",\n",{"type":27,"tag":687,"props":1688,"children":1689},{"class":689,"line":848},[1690,1695,1699,1703,1707],{"type":27,"tag":687,"props":1691,"children":1692},{"style":716},[1693],{"type":33,"value":1694},"  body: ",{"type":27,"tag":687,"props":1696,"children":1697},{"style":881},[1698],{"type":33,"value":932},{"type":27,"tag":687,"props":1700,"children":1701},{"style":716},[1702],{"type":33,"value":937},{"type":27,"tag":687,"props":1704,"children":1705},{"style":710},[1706],{"type":33,"value":942},{"type":27,"tag":687,"props":1708,"children":1709},{"style":716},[1710],{"type":33,"value":1711},"(payload),\n",{"type":27,"tag":687,"props":1713,"children":1714},{"class":689,"line":857},[1715],{"type":27,"tag":687,"props":1716,"children":1717},{"style":716},[1718],{"type":33,"value":1719},"  signal: controller.signal,\n",{"type":27,"tag":687,"props":1721,"children":1722},{"class":689,"line":907},[1723],{"type":27,"tag":687,"props":1724,"children":1725},{"style":716},[1726],{"type":33,"value":1727},"})\n",{"type":27,"tag":687,"props":1729,"children":1730},{"class":689,"line":979},[1731],{"type":27,"tag":687,"props":1732,"children":1733},{"emptyLinePlaceholder":18},[1734],{"type":33,"value":854},{"type":27,"tag":687,"props":1736,"children":1737},{"class":689,"line":988},[1738,1742,1747],{"type":27,"tag":687,"props":1739,"children":1740},{"style":694},[1741],{"type":33,"value":1373},{"type":27,"tag":687,"props":1743,"children":1744},{"style":710},[1745],{"type":33,"value":1746}," stopGeneration",{"type":27,"tag":687,"props":1748,"children":1749},{"style":716},[1750],{"type":33,"value":1751},"() {\n",{"type":27,"tag":687,"props":1753,"children":1754},{"class":689,"line":996},[1755,1760,1765],{"type":27,"tag":687,"props":1756,"children":1757},{"style":716},[1758],{"type":33,"value":1759},"  controller.",{"type":27,"tag":687,"props":1761,"children":1762},{"style":710},[1763],{"type":33,"value":1764},"abort",{"type":27,"tag":687,"props":1766,"children":1767},{"style":716},[1768],{"type":33,"value":1055},{"type":27,"tag":687,"props":1770,"children":1771},{"class":689,"line":1040},[1772],{"type":27,"tag":687,"props":1773,"children":1774},{"style":716},[1775],{"type":33,"value":1064},{"type":27,"tag":243,"props":1777,"children":1779},{"id":1778},"_52-服务端也必须感知取消",[1780],{"type":33,"value":1781},"5.2 服务端也必须感知取消",{"type":27,"tag":35,"props":1783,"children":1784},{},[1785],{"type":33,"value":1786},"前端断开只是第一步。更重要的是服务端能不能把这个取消传到上游模型调用层。",{"type":27,"tag":35,"props":1788,"children":1789},{},[1790],{"type":33,"value":1791},"否则链路前半段停了，后半段还在烧钱。",{"type":27,"tag":200,"props":1793,"children":1794},{},[],{"type":27,"tag":204,"props":1796,"children":1798},{"id":1797},"六实时搜索体验升级很大风险也放大得很快",[1799],{"type":33,"value":1800},"六、实时搜索：体验升级很大，风险也放大得很快",{"type":27,"tag":35,"props":1802,"children":1803},{},[1804],{"type":33,"value":1805},"很多 AI 搜索产品的第一眼惊艳，不是答案多聪明，而是：",{"type":27,"tag":51,"props":1807,"children":1808},{},[1809,1814,1819],{"type":27,"tag":55,"props":1810,"children":1811},{},[1812],{"type":33,"value":1813},"你刚输入几个字，就已经在给建议",{"type":27,"tag":55,"props":1815,"children":1816},{},[1817],{"type":33,"value":1818},"搜索结果逐步更新",{"type":27,"tag":55,"props":1820,"children":1821},{},[1822],{"type":33,"value":1823},"有些模块先出来，有些模块后补齐",{"type":27,"tag":35,"props":1825,"children":1826},{},[1827],{"type":33,"value":1828},"这类体验会让产品显得非常“活”。",{"type":27,"tag":35,"props":1830,"children":1831},{},[1832],{"type":33,"value":1833},"但工程上，实时搜索也是最容易把后端打穿的模块之一。",{"type":27,"tag":243,"props":1835,"children":1837},{"id":1836},"_61-为什么实时搜索容易出问题",[1838],{"type":33,"value":1839},"6.1 为什么实时搜索容易出问题",{"type":27,"tag":35,"props":1841,"children":1842},{},[1843],{"type":33,"value":1844},"因为输入是连续的。",{"type":27,"tag":35,"props":1846,"children":1847},{},[1848],{"type":33,"value":1849},"用户打“向量数据库怎么选”这 9 个字时，系统可能收到：",{"type":27,"tag":51,"props":1851,"children":1852},{},[1853,1858,1863,1868,1873,1878],{"type":27,"tag":55,"props":1854,"children":1855},{},[1856],{"type":33,"value":1857},"向",{"type":27,"tag":55,"props":1859,"children":1860},{},[1861],{"type":33,"value":1862},"向量",{"type":27,"tag":55,"props":1864,"children":1865},{},[1866],{"type":33,"value":1867},"向量数",{"type":27,"tag":55,"props":1869,"children":1870},{},[1871],{"type":33,"value":1872},"向量数据库",{"type":27,"tag":55,"props":1874,"children":1875},{},[1876],{"type":33,"value":1877},"向量数据库怎",{"type":27,"tag":55,"props":1879,"children":1880},{},[1881],{"type":33,"value":1882},"…",{"type":27,"tag":35,"props":1884,"children":1885},{},[1886],{"type":33,"value":1887},"如果每次都全量触发：",{"type":27,"tag":51,"props":1889,"children":1890},{},[1891,1896,1901],{"type":27,"tag":55,"props":1892,"children":1893},{},[1894],{"type":33,"value":1895},"检索",{"type":27,"tag":55,"props":1897,"children":1898},{},[1899],{"type":33,"value":1900},"rerank",{"type":27,"tag":55,"props":1902,"children":1903},{},[1904],{"type":33,"value":1905},"LLM 总结",{"type":27,"tag":35,"props":1907,"children":1908},{},[1909],{"type":33,"value":1910},"那系统会迅速进入“为了实时而疯狂自耗”的状态。",{"type":27,"tag":243,"props":1912,"children":1914},{"id":1913},"_62-三个必须做的控制",[1915],{"type":33,"value":1916},"6.2 三个必须做的控制",{"type":27,"tag":1105,"props":1918,"children":1920},{"id":1919},"a-防抖debounce",[1921],{"type":33,"value":1922},"A. 防抖（Debounce）",{"type":27,"tag":35,"props":1924,"children":1925},{},[1926],{"type":33,"value":1927},"不要每次按键都打请求。",{"type":27,"tag":1105,"props":1929,"children":1931},{"id":1930},"b-请求废弃",[1932],{"type":33,"value":1933},"B. 请求废弃",{"type":27,"tag":35,"props":1935,"children":1936},{},[1937],{"type":33,"value":1938},"旧请求结果回来时，如果已经不是当前输入，必须丢弃。",{"type":27,"tag":1105,"props":1940,"children":1942},{"id":1941},"c-分层搜索",[1943],{"type":33,"value":1944},"C. 分层搜索",{"type":27,"tag":35,"props":1946,"children":1947},{},[1948],{"type":33,"value":1949},"先做轻量建议，再做重型生成。",{"type":27,"tag":243,"props":1951,"children":1953},{"id":1952},"_63-一个简单防抖示例",[1954],{"type":33,"value":1955},"6.3 一个简单防抖示例",{"type":27,"tag":677,"props":1957,"children":1959},{"className":679,"code":1958,"language":681,"meta":7,"style":7},"let timer: ReturnType\u003Ctypeof setTimeout> | null = null\n\nfunction onInputChange(value: string) {\n  if (timer) clearTimeout(timer)\n\n  timer = setTimeout(() => {\n    search(value)\n  }, 250)\n}\n",[1960],{"type":27,"tag":146,"props":1961,"children":1962},{"__ignoreMap":7},[1963,2018,2025,2058,2081,2088,2120,2133,2150],{"type":27,"tag":687,"props":1964,"children":1965},{"class":689,"line":690},[1966,1971,1976,1980,1985,1989,1994,1999,2004,2009,2013],{"type":27,"tag":687,"props":1967,"children":1968},{"style":694},[1969],{"type":33,"value":1970},"let",{"type":27,"tag":687,"props":1972,"children":1973},{"style":716},[1974],{"type":33,"value":1975}," timer",{"type":27,"tag":687,"props":1977,"children":1978},{"style":694},[1979],{"type":33,"value":1392},{"type":27,"tag":687,"props":1981,"children":1982},{"style":710},[1983],{"type":33,"value":1984}," ReturnType",{"type":27,"tag":687,"props":1986,"children":1987},{"style":716},[1988],{"type":33,"value":1312},{"type":27,"tag":687,"props":1990,"children":1991},{"style":694},[1992],{"type":33,"value":1993},"typeof",{"type":27,"tag":687,"props":1995,"children":1996},{"style":716},[1997],{"type":33,"value":1998}," setTimeout> ",{"type":27,"tag":687,"props":2000,"children":2001},{"style":694},[2002],{"type":33,"value":2003},"|",{"type":27,"tag":687,"props":2005,"children":2006},{"style":881},[2007],{"type":33,"value":2008}," null",{"type":27,"tag":687,"props":2010,"children":2011},{"style":694},[2012],{"type":33,"value":1625},{"type":27,"tag":687,"props":2014,"children":2015},{"style":881},[2016],{"type":33,"value":2017}," null\n",{"type":27,"tag":687,"props":2019,"children":2020},{"class":689,"line":743},[2021],{"type":27,"tag":687,"props":2022,"children":2023},{"emptyLinePlaceholder":18},[2024],{"type":33,"value":854},{"type":27,"tag":687,"props":2026,"children":2027},{"class":689,"line":781},[2028,2032,2037,2041,2046,2050,2054],{"type":27,"tag":687,"props":2029,"children":2030},{"style":694},[2031],{"type":33,"value":1373},{"type":27,"tag":687,"props":2033,"children":2034},{"style":710},[2035],{"type":33,"value":2036}," onInputChange",{"type":27,"tag":687,"props":2038,"children":2039},{"style":716},[2040],{"type":33,"value":719},{"type":27,"tag":687,"props":2042,"children":2043},{"style":722},[2044],{"type":33,"value":2045},"value",{"type":27,"tag":687,"props":2047,"children":2048},{"style":694},[2049],{"type":33,"value":1392},{"type":27,"tag":687,"props":2051,"children":2052},{"style":881},[2053],{"type":33,"value":1397},{"type":27,"tag":687,"props":2055,"children":2056},{"style":716},[2057],{"type":33,"value":740},{"type":27,"tag":687,"props":2059,"children":2060},{"class":689,"line":20},[2061,2066,2071,2076],{"type":27,"tag":687,"props":2062,"children":2063},{"style":694},[2064],{"type":33,"value":2065},"  if",{"type":27,"tag":687,"props":2067,"children":2068},{"style":716},[2069],{"type":33,"value":2070}," (timer) ",{"type":27,"tag":687,"props":2072,"children":2073},{"style":710},[2074],{"type":33,"value":2075},"clearTimeout",{"type":27,"tag":687,"props":2077,"children":2078},{"style":716},[2079],{"type":33,"value":2080},"(timer)\n",{"type":27,"tag":687,"props":2082,"children":2083},{"class":689,"line":848},[2084],{"type":27,"tag":687,"props":2085,"children":2086},{"emptyLinePlaceholder":18},[2087],{"type":33,"value":854},{"type":27,"tag":687,"props":2089,"children":2090},{"class":689,"line":857},[2091,2096,2100,2105,2110,2115],{"type":27,"tag":687,"props":2092,"children":2093},{"style":716},[2094],{"type":33,"value":2095},"  timer ",{"type":27,"tag":687,"props":2097,"children":2098},{"style":694},[2099],{"type":33,"value":1252},{"type":27,"tag":687,"props":2101,"children":2102},{"style":710},[2103],{"type":33,"value":2104}," setTimeout",{"type":27,"tag":687,"props":2106,"children":2107},{"style":716},[2108],{"type":33,"value":2109},"(() ",{"type":27,"tag":687,"props":2111,"children":2112},{"style":694},[2113],{"type":33,"value":2114},"=>",{"type":27,"tag":687,"props":2116,"children":2117},{"style":716},[2118],{"type":33,"value":2119}," {\n",{"type":27,"tag":687,"props":2121,"children":2122},{"class":689,"line":907},[2123,2128],{"type":27,"tag":687,"props":2124,"children":2125},{"style":710},[2126],{"type":33,"value":2127},"    search",{"type":27,"tag":687,"props":2129,"children":2130},{"style":716},[2131],{"type":33,"value":2132},"(value)\n",{"type":27,"tag":687,"props":2134,"children":2135},{"class":689,"line":979},[2136,2141,2146],{"type":27,"tag":687,"props":2137,"children":2138},{"style":716},[2139],{"type":33,"value":2140},"  }, ",{"type":27,"tag":687,"props":2142,"children":2143},{"style":881},[2144],{"type":33,"value":2145},"250",{"type":27,"tag":687,"props":2147,"children":2148},{"style":716},[2149],{"type":33,"value":778},{"type":27,"tag":687,"props":2151,"children":2152},{"class":689,"line":988},[2153],{"type":27,"tag":687,"props":2154,"children":2155},{"style":716},[2156],{"type":33,"value":1064},{"type":27,"tag":35,"props":2158,"children":2159},{},[2160],{"type":33,"value":2161},"250ms 并不是标准答案，但它体现了一种思路：",{"type":27,"tag":216,"props":2163,"children":2164},{},[2165],{"type":27,"tag":35,"props":2166,"children":2167},{},[2168],{"type":33,"value":2169},"实时搜索不是“越快越好”，而是“用户体感足够快，同时后端可承受”。",{"type":27,"tag":200,"props":2171,"children":2172},{},[],{"type":27,"tag":204,"props":2174,"children":2176},{"id":2175},"七增量搜索结果不是一次请求只出一种内容",[2177],{"type":33,"value":2178},"七、增量搜索结果：不是一次请求只出一种内容",{"type":27,"tag":35,"props":2180,"children":2181},{},[2182],{"type":33,"value":2183},"更成熟的 AI 搜索体验，往往会把结果拆层返回。",{"type":27,"tag":35,"props":2185,"children":2186},{},[2187],{"type":33,"value":2188},"例如：",{"type":27,"tag":76,"props":2190,"children":2191},{},[2192,2197,2202],{"type":27,"tag":55,"props":2193,"children":2194},{},[2195],{"type":33,"value":2196},"先返回关键词建议",{"type":27,"tag":55,"props":2198,"children":2199},{},[2200],{"type":33,"value":2201},"再返回传统检索结果列表",{"type":27,"tag":55,"props":2203,"children":2204},{},[2205],{"type":33,"value":2206},"最后流式返回 AI 总结",{"type":27,"tag":35,"props":2208,"children":2209},{},[2210],{"type":33,"value":2211},"这样用户在第一个几百毫秒内就能获得反馈，不用把所有体验都绑死在 LLM 最终回答上。",{"type":27,"tag":243,"props":2213,"children":2215},{"id":2214},"_71-为什么分层结果特别重要",[2216],{"type":33,"value":2217},"7.1 为什么分层结果特别重要",{"type":27,"tag":35,"props":2219,"children":2220},{},[2221],{"type":33,"value":2222},"因为它让产品从“全有或全无”变成“逐步完成”：",{"type":27,"tag":51,"props":2224,"children":2225},{},[2226,2231,2236],{"type":27,"tag":55,"props":2227,"children":2228},{},[2229],{"type":33,"value":2230},"就算 AI 总结稍慢，用户也已经能开始浏览原始结果",{"type":27,"tag":55,"props":2232,"children":2233},{},[2234],{"type":33,"value":2235},"就算生成失败，传统检索结果仍然可用",{"type":27,"tag":55,"props":2237,"children":2238},{},[2239],{"type":33,"value":2240},"体验和稳定性都更可控",{"type":27,"tag":35,"props":2242,"children":2243},{},[2244],{"type":33,"value":2245},"这也是很多 AI 搜索产品真正好用的原因：",{"type":27,"tag":216,"props":2247,"children":2248},{},[2249],{"type":27,"tag":35,"props":2250,"children":2251},{},[2252],{"type":33,"value":2253},"它们不是用 AI 替代搜索，而是让 AI 增强搜索。",{"type":27,"tag":200,"props":2255,"children":2256},{},[],{"type":27,"tag":204,"props":2258,"children":2260},{"id":2259},"八观测流式体验最怕看起来偶尔卡一下但没人知道为什么",[2261],{"type":33,"value":2262},"八、观测：流式体验最怕“看起来偶尔卡一下，但没人知道为什么”",{"type":27,"tag":35,"props":2264,"children":2265},{},[2266],{"type":33,"value":2267},"流式链路一旦出问题，往往不如普通接口那样容易发现。",{"type":27,"tag":35,"props":2269,"children":2270},{},[2271],{"type":33,"value":2272},"因为它常见的故障不是直接 500，而是：",{"type":27,"tag":51,"props":2274,"children":2275},{},[2276,2281,2286,2291,2296],{"type":27,"tag":55,"props":2277,"children":2278},{},[2279],{"type":33,"value":2280},"首包延迟异常高",{"type":27,"tag":55,"props":2282,"children":2283},{},[2284],{"type":33,"value":2285},"中途断流",{"type":27,"tag":55,"props":2287,"children":2288},{},[2289],{"type":33,"value":2290},"chunk 间隔抖动很大",{"type":27,"tag":55,"props":2292,"children":2293},{},[2294],{"type":33,"value":2295},"代理偶发缓冲",{"type":27,"tag":55,"props":2297,"children":2298},{},[2299],{"type":33,"value":2300},"客户端取消没有真正取消上游",{"type":27,"tag":243,"props":2302,"children":2304},{"id":2303},"_81-至少该看这些指标",[2305],{"type":33,"value":2306},"8.1 至少该看这些指标",{"type":27,"tag":555,"props":2308,"children":2309},{},[2310,2326],{"type":27,"tag":559,"props":2311,"children":2312},{},[2313],{"type":27,"tag":563,"props":2314,"children":2315},{},[2316,2321],{"type":27,"tag":567,"props":2317,"children":2318},{},[2319],{"type":33,"value":2320},"指标",{"type":27,"tag":567,"props":2322,"children":2323},{},[2324],{"type":33,"value":2325},"价值",{"type":27,"tag":583,"props":2327,"children":2328},{},[2329,2342,2355,2368,2381,2394],{"type":27,"tag":563,"props":2330,"children":2331},{},[2332,2337],{"type":27,"tag":590,"props":2333,"children":2334},{},[2335],{"type":33,"value":2336},"首字节时间 / 首 token 时间",{"type":27,"tag":590,"props":2338,"children":2339},{},[2340],{"type":33,"value":2341},"看第一反馈够不够快",{"type":27,"tag":563,"props":2343,"children":2344},{},[2345,2350],{"type":27,"tag":590,"props":2346,"children":2347},{},[2348],{"type":33,"value":2349},"平均流式持续时间",{"type":27,"tag":590,"props":2351,"children":2352},{},[2353],{"type":33,"value":2354},"看长响应表现",{"type":27,"tag":563,"props":2356,"children":2357},{},[2358,2363],{"type":27,"tag":590,"props":2359,"children":2360},{},[2361],{"type":33,"value":2362},"中断率",{"type":27,"tag":590,"props":2364,"children":2365},{},[2366],{"type":33,"value":2367},"看链路稳定性",{"type":27,"tag":563,"props":2369,"children":2370},{},[2371,2376],{"type":27,"tag":590,"props":2372,"children":2373},{},[2374],{"type":33,"value":2375},"用户取消率",{"type":27,"tag":590,"props":2377,"children":2378},{},[2379],{"type":33,"value":2380},"看答案是否常常不对路",{"type":27,"tag":563,"props":2382,"children":2383},{},[2384,2389],{"type":27,"tag":590,"props":2385,"children":2386},{},[2387],{"type":33,"value":2388},"废弃请求比例",{"type":27,"tag":590,"props":2390,"children":2391},{},[2392],{"type":33,"value":2393},"看实时搜索是否过度触发",{"type":27,"tag":563,"props":2395,"children":2396},{},[2397,2402],{"type":27,"tag":590,"props":2398,"children":2399},{},[2400],{"type":33,"value":2401},"单次输入触发请求数",{"type":27,"tag":590,"props":2403,"children":2404},{},[2405],{"type":33,"value":2406},"看前端节流是否失效",{"type":27,"tag":243,"props":2408,"children":2410},{"id":2409},"_82-为什么这些指标重要",[2411],{"type":33,"value":2412},"8.2 为什么这些指标重要",{"type":27,"tag":35,"props":2414,"children":2415},{},[2416],{"type":33,"value":2417},"因为你要区分：",{"type":27,"tag":51,"props":2419,"children":2420},{},[2421,2426,2431,2436],{"type":27,"tag":55,"props":2422,"children":2423},{},[2424],{"type":33,"value":2425},"是模型慢",{"type":27,"tag":55,"props":2427,"children":2428},{},[2429],{"type":33,"value":2430},"是代理层慢",{"type":27,"tag":55,"props":2432,"children":2433},{},[2434],{"type":33,"value":2435},"是前端触发太频繁",{"type":27,"tag":55,"props":2437,"children":2438},{},[2439],{"type":33,"value":2440},"是用户根本不需要这么多实时请求",{"type":27,"tag":35,"props":2442,"children":2443},{},[2444],{"type":33,"value":2445},"没有观测，流式体验优化很容易停留在“感觉上好像顺一点”。",{"type":27,"tag":200,"props":2447,"children":2448},{},[],{"type":27,"tag":204,"props":2450,"children":2452},{"id":2451},"九三个高频落地坑提前避开",[2453],{"type":33,"value":2454},"九、三个高频落地坑，提前避开",{"type":27,"tag":243,"props":2456,"children":2458},{"id":2457},"_91-坑一把所有实时体验都绑在-llm-上",[2459],{"type":33,"value":2460},"9.1 坑一：把所有实时体验都绑在 LLM 上",{"type":27,"tag":35,"props":2462,"children":2463},{},[2464],{"type":33,"value":2465},"这样会导致：",{"type":27,"tag":51,"props":2467,"children":2468},{},[2469,2474,2479],{"type":27,"tag":55,"props":2470,"children":2471},{},[2472],{"type":33,"value":2473},"首反馈一定慢",{"type":27,"tag":55,"props":2475,"children":2476},{},[2477],{"type":33,"value":2478},"成本高",{"type":27,"tag":55,"props":2480,"children":2481},{},[2482],{"type":33,"value":2483},"一个环节失败，全页面都空白",{"type":27,"tag":35,"props":2485,"children":2486},{},[2487],{"type":33,"value":2488},"更好的做法是：",{"type":27,"tag":51,"props":2490,"children":2491},{},[2492,2497,2502],{"type":27,"tag":55,"props":2493,"children":2494},{},[2495],{"type":33,"value":2496},"检索结果先行",{"type":27,"tag":55,"props":2498,"children":2499},{},[2500],{"type":33,"value":2501},"AI 结果后补",{"type":27,"tag":55,"props":2503,"children":2504},{},[2505],{"type":33,"value":2506},"非核心模块可降级",{"type":27,"tag":243,"props":2508,"children":2510},{"id":2509},"_92-坑二只做流式不做取消和废弃控制",[2511],{"type":33,"value":2512},"9.2 坑二：只做流式，不做取消和废弃控制",{"type":27,"tag":35,"props":2514,"children":2515},{},[2516],{"type":33,"value":2517},"这样系统会被很多无效请求拖累，尤其在实时搜索里最明显。",{"type":27,"tag":243,"props":2519,"children":2521},{"id":2520},"_93-坑三服务端能流前端不会优雅接收",[2522],{"type":33,"value":2523},"9.3 坑三：服务端能流，前端不会“优雅接收”",{"type":27,"tag":35,"props":2525,"children":2526},{},[2527],{"type":33,"value":2528},"比如：",{"type":27,"tag":51,"props":2530,"children":2531},{},[2532,2537,2542],{"type":27,"tag":55,"props":2533,"children":2534},{},[2535],{"type":33,"value":2536},"代码块断裂渲染",{"type":27,"tag":55,"props":2538,"children":2539},{},[2540],{"type":33,"value":2541},"高频 setState 卡顿",{"type":27,"tag":55,"props":2543,"children":2544},{},[2545],{"type":33,"value":2546},"失败后没有明显状态提示",{"type":27,"tag":35,"props":2548,"children":2549},{},[2550],{"type":33,"value":2551},"这会让用户觉得系统“有点高级，但不好用”。",{"type":27,"tag":200,"props":2553,"children":2554},{},[],{"type":27,"tag":204,"props":2556,"children":2558},{"id":2557},"十给团队的流式响应与实时搜索检查清单",[2559],{"type":33,"value":2560},"十、给团队的流式响应与实时搜索检查清单",{"type":27,"tag":243,"props":2562,"children":2564},{"id":2563},"传输层",[2565],{"type":33,"value":2563},{"type":27,"tag":51,"props":2567,"children":2570},{"className":2568},[2569],"contains-task-list",[2571,2595,2604],{"type":27,"tag":55,"props":2572,"children":2575},{"className":2573},[2574],"task-list-item",[2576,2581,2583,2588,2590],{"type":27,"tag":2577,"props":2578,"children":2580},"input",{"disabled":18,"type":2579},"checkbox",[],{"type":33,"value":2582}," 是否根据场景选择 ",{"type":27,"tag":146,"props":2584,"children":2586},{"className":2585},[],[2587],{"type":33,"value":14},{"type":33,"value":2589}," / ",{"type":27,"tag":146,"props":2591,"children":2593},{"className":2592},[],[2594],{"type":33,"value":176},{"type":27,"tag":55,"props":2596,"children":2598},{"className":2597},[2574],[2599,2602],{"type":27,"tag":2577,"props":2600,"children":2601},{"disabled":18,"type":2579},[],{"type":33,"value":2603}," 代理和网关是否支持流式透传",{"type":27,"tag":55,"props":2605,"children":2607},{"className":2606},[2574],[2608,2611],{"type":27,"tag":2577,"props":2609,"children":2610},{"disabled":18,"type":2579},[],{"type":33,"value":2612}," 是否配置了合理超时与断开处理",{"type":27,"tag":243,"props":2614,"children":2616},{"id":2615},"前端体验层",[2617],{"type":33,"value":2615},{"type":27,"tag":51,"props":2619,"children":2621},{"className":2620},[2569],[2622,2631,2640,2649],{"type":27,"tag":55,"props":2623,"children":2625},{"className":2624},[2574],[2626,2629],{"type":27,"tag":2577,"props":2627,"children":2628},{"disabled":18,"type":2579},[],{"type":33,"value":2630}," 是否有首反馈状态",{"type":27,"tag":55,"props":2632,"children":2634},{"className":2633},[2574],[2635,2638],{"type":27,"tag":2577,"props":2636,"children":2637},{"disabled":18,"type":2579},[],{"type":33,"value":2639}," 是否支持取消生成",{"type":27,"tag":55,"props":2641,"children":2643},{"className":2642},[2574],[2644,2647],{"type":27,"tag":2577,"props":2645,"children":2646},{"disabled":18,"type":2579},[],{"type":33,"value":2648}," 是否对增量渲染做节流或批量更新",{"type":27,"tag":55,"props":2650,"children":2652},{"className":2651},[2574],[2653,2656],{"type":27,"tag":2577,"props":2654,"children":2655},{"disabled":18,"type":2579},[],{"type":33,"value":2657}," 是否对旧请求结果做废弃处理",{"type":27,"tag":243,"props":2659,"children":2661},{"id":2660},"搜索治理层",[2662],{"type":33,"value":2660},{"type":27,"tag":51,"props":2664,"children":2666},{"className":2665},[2569],[2667,2676,2685],{"type":27,"tag":55,"props":2668,"children":2670},{"className":2669},[2574],[2671,2674],{"type":27,"tag":2577,"props":2672,"children":2673},{"disabled":18,"type":2579},[],{"type":33,"value":2675}," 是否有防抖",{"type":27,"tag":55,"props":2677,"children":2679},{"className":2678},[2574],[2680,2683],{"type":27,"tag":2577,"props":2681,"children":2682},{"disabled":18,"type":2579},[],{"type":33,"value":2684}," 是否区分轻量建议和重型生成",{"type":27,"tag":55,"props":2686,"children":2688},{"className":2687},[2574],[2689,2692],{"type":27,"tag":2577,"props":2690,"children":2691},{"disabled":18,"type":2579},[],{"type":33,"value":2693}," 是否能在 AI 失败时保留基础检索结果",{"type":27,"tag":243,"props":2695,"children":2697},{"id":2696},"观测层",[2698],{"type":33,"value":2696},{"type":27,"tag":51,"props":2700,"children":2702},{"className":2701},[2569],[2703,2712,2721],{"type":27,"tag":55,"props":2704,"children":2706},{"className":2705},[2574],[2707,2710],{"type":27,"tag":2577,"props":2708,"children":2709},{"disabled":18,"type":2579},[],{"type":33,"value":2711}," 是否监控首 token 时间和中断率",{"type":27,"tag":55,"props":2713,"children":2715},{"className":2714},[2574],[2716,2719],{"type":27,"tag":2577,"props":2717,"children":2718},{"disabled":18,"type":2579},[],{"type":33,"value":2720}," 是否统计取消率和废弃请求比例",{"type":27,"tag":55,"props":2722,"children":2724},{"className":2723},[2574],[2725,2728],{"type":27,"tag":2577,"props":2726,"children":2727},{"disabled":18,"type":2579},[],{"type":33,"value":2729}," 是否能区分链路问题来自前端、代理还是模型侧",{"type":27,"tag":200,"props":2731,"children":2732},{},[],{"type":27,"tag":204,"props":2734,"children":2736},{"id":2735},"总结",[2737],{"type":33,"value":2735},{"type":27,"tag":35,"props":2739,"children":2740},{},[2741],{"type":33,"value":2742},"把流式响应与实时搜索讲透，可以收敛成 5 句话：",{"type":27,"tag":76,"props":2744,"children":2745},{},[2746,2754,2769,2777,2785],{"type":27,"tag":55,"props":2747,"children":2748},{},[2749],{"type":27,"tag":235,"props":2750,"children":2751},{},[2752],{"type":33,"value":2753},"流式体验优化的不是模型速度，而是用户对等待的感知。",{"type":27,"tag":55,"props":2755,"children":2756},{},[2757],{"type":27,"tag":235,"props":2758,"children":2759},{},[2760,2762,2767],{"type":33,"value":2761},"AI 文本增量输出场景里，",{"type":27,"tag":146,"props":2763,"children":2765},{"className":2764},[],[2766],{"type":33,"value":14},{"type":33,"value":2768}," 往往是最实用的默认选项。",{"type":27,"tag":55,"props":2770,"children":2771},{},[2772],{"type":27,"tag":235,"props":2773,"children":2774},{},[2775],{"type":33,"value":2776},"取消控制、请求废弃和分层结果，比“能流出来”更重要。",{"type":27,"tag":55,"props":2778,"children":2779},{},[2780],{"type":27,"tag":235,"props":2781,"children":2782},{},[2783],{"type":33,"value":2784},"实时搜索不是每次按键都去重算一遍，而是要有节制地给反馈。",{"type":27,"tag":55,"props":2786,"children":2787},{},[2788],{"type":27,"tag":235,"props":2789,"children":2790},{},[2791],{"type":33,"value":2792},"真正成熟的 AI Web 体验，一定同时兼顾速度感、稳定性和成本。",{"type":27,"tag":35,"props":2794,"children":2795},{},[2796],{"type":33,"value":2797},"如果你只记住一句话，我希望是这一句：",{"type":27,"tag":216,"props":2799,"children":2800},{},[2801],{"type":27,"tag":35,"props":2802,"children":2803},{},[2804],{"type":33,"value":2805},"好的流式体验，不是让内容一字一字冒出来，而是让用户始终知道系统正在往正确方向前进。",{"type":27,"tag":35,"props":2807,"children":2808},{},[2809],{"type":33,"value":2810},"否则产品最后给人的感觉，往往不是“实时”，而是——",{"type":27,"tag":35,"props":2812,"children":2813},{},[2814],{"type":27,"tag":235,"props":2815,"children":2816},{},[2817],{"type":33,"value":2818},"一直在忙，但不知道在忙什么。",{"type":27,"tag":2820,"props":2821,"children":2822},"style",{},[2823],{"type":33,"value":2824},"html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}",{"title":7,"searchDepth":781,"depth":781,"links":2826},[2827,2831,2840,2850,2855,2859,2868,2871,2875,2880,2886],{"id":206,"depth":743,"text":209,"children":2828},[2829,2830],{"id":245,"depth":781,"text":248},{"id":289,"depth":781,"text":292},{"id":331,"depth":743,"text":2832,"children":2833},"二、三种常见实时传输方式，别一上来就默认 WebSocket",[2834,2836,2838,2839],{"id":364,"depth":781,"text":2835},"2.1 SSE：最适合单向增量输出",{"id":438,"depth":781,"text":2837},"2.2 WebSocket：适合双向高频互动",{"id":498,"depth":781,"text":501},{"id":550,"depth":781,"text":553},{"id":652,"depth":743,"text":2841,"children":2842},"三、SSE 落地时，真正难的不是写出来，而是把整条链路打通",[2843,2845],{"id":665,"depth":781,"text":2844},"3.1 一个最小 SSE 示例",{"id":1100,"depth":781,"text":1103,"children":2846},[2847,2848,2849],{"id":1107,"depth":20,"text":1110},{"id":1118,"depth":20,"text":1121},{"id":1129,"depth":20,"text":1132},{"id":1143,"depth":743,"text":1146,"children":2851},[2852,2853,2854],{"id":1177,"depth":781,"text":1180},{"id":1206,"depth":781,"text":1209},{"id":1499,"depth":781,"text":1502},{"id":1531,"depth":743,"text":1534,"children":2856},[2857,2858],{"id":1598,"depth":781,"text":1601},{"id":1778,"depth":781,"text":1781},{"id":1797,"depth":743,"text":1800,"children":2860},[2861,2862,2867],{"id":1836,"depth":781,"text":1839},{"id":1913,"depth":781,"text":1916,"children":2863},[2864,2865,2866],{"id":1919,"depth":20,"text":1922},{"id":1930,"depth":20,"text":1933},{"id":1941,"depth":20,"text":1944},{"id":1952,"depth":781,"text":1955},{"id":2175,"depth":743,"text":2178,"children":2869},[2870],{"id":2214,"depth":781,"text":2217},{"id":2259,"depth":743,"text":2262,"children":2872},[2873,2874],{"id":2303,"depth":781,"text":2306},{"id":2409,"depth":781,"text":2412},{"id":2451,"depth":743,"text":2454,"children":2876},[2877,2878,2879],{"id":2457,"depth":781,"text":2460},{"id":2509,"depth":781,"text":2512},{"id":2520,"depth":781,"text":2523},{"id":2557,"depth":743,"text":2560,"children":2881},[2882,2883,2884,2885],{"id":2563,"depth":781,"text":2563},{"id":2615,"depth":781,"text":2615},{"id":2660,"depth":781,"text":2660},{"id":2696,"depth":781,"text":2696},{"id":2735,"depth":743,"text":2735},"markdown","content:topics:engineering:streaming-response-realtime-search-guide.md","content","topics/engineering/streaming-response-realtime-search-guide.md","topics/engineering/streaming-response-realtime-search-guide","md",1777109948253]