[{"data":1,"prerenderedAt":3695},["ShallowReactive",2],{"content-/topics/engineering/llm-api-integration-complete-guide":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"category":5,"tags":11,"author":17,"featured":18,"series":19,"seriesOrder":20,"readingTime":21,"image":22,"body":23,"_type":3690,"_id":3691,"_source":958,"_file":3692,"_stem":3693,"_extension":3694},"/topics/engineering/llm-api-integration-complete-guide","engineering",false,"","LLM API 集成完全指南：从模型选型到流式输出的工程实践","把大模型接进产品，不是调通一个 SDK 就完了。本文从真实工程视角讲清模型选型、请求链路、重试与超时、流式响应、结构化输出、提示模板、观测与降本策略，帮你把 LLM API 集成做成稳定能力。","2026-03-08",[12,13,14,15,16],"LLM","AI集成","API集成","流式响应","工程实践","小明",true,"ai-integration-and-intelligent-applications",1,18,"/images/articles/llm-api-integration-complete-guide-cover.jpg",{"type":24,"children":25,"toc":3633},"root",[26,34,40,65,70,75,109,114,123,128,133,161,166,170,177,182,200,205,217,222,245,250,257,262,280,285,303,309,314,444,449,452,458,464,581,587,592,610,615,621,710,715,718,724,729,734,762,767,773,778,815,821,1433,1438,1441,1447,1452,1457,1470,1476,1565,1571,1576,1581,1599,1604,1610,1615,1620,1643,1655,1658,1664,1669,1692,1697,1703,1885,1891,1896,1901,2168,2173,2191,2194,2200,2205,2211,2222,2227,2240,2246,2602,2608,2626,2631,2634,2640,2645,2650,2673,2679,2684,2702,2707,2713,2786,2792,3001,3006,3009,3015,3020,3025,3031,3144,3150,3155,3159,3182,3187,3190,3196,3202,3207,3225,3231,3236,3241,3259,3265,3270,3298,3303,3306,3312,3318,3322,3340,3345,3351,3356,3362,3367,3373,3378,3384,3389,3392,3398,3403,3437,3442,3473,3478,3509,3514,3545,3548,3553,3558,3601,3606,3614,3619,3627],{"type":27,"tag":28,"props":29,"children":31},"element","h1",{"id":30},"llm-api-集成完全指南从模型选型到流式输出的工程实践",[32],{"type":33,"value":8},"text",{"type":27,"tag":35,"props":36,"children":37},"p",{},[38],{"type":33,"value":39},"很多团队第一次做 AI 功能，路径都差不多：",{"type":27,"tag":41,"props":42,"children":43},"ol",{},[44,50,55,60],{"type":27,"tag":45,"props":46,"children":47},"li",{},[48],{"type":33,"value":49},"申请一个模型 API Key",{"type":27,"tag":45,"props":51,"children":52},{},[53],{"type":33,"value":54},"跑通一个 demo",{"type":27,"tag":45,"props":56,"children":57},{},[58],{"type":33,"value":59},"聊天框里成功返回一句话",{"type":27,"tag":45,"props":61,"children":62},{},[63],{"type":33,"value":64},"兴奋地说：“这不就接上了吗？”",{"type":27,"tag":35,"props":66,"children":67},{},[68],{"type":33,"value":69},"真正上线以后，事情才开始变复杂。",{"type":27,"tag":35,"props":71,"children":72},{},[73],{"type":33,"value":74},"你会很快遇到这些问题：",{"type":27,"tag":76,"props":77,"children":78},"ul",{},[79,84,89,94,99,104],{"type":27,"tag":45,"props":80,"children":81},{},[82],{"type":33,"value":83},"模型偶尔很聪明，偶尔非常跑偏",{"type":27,"tag":45,"props":85,"children":86},{},[87],{"type":33,"value":88},"响应时延忽高忽低",{"type":27,"tag":45,"props":90,"children":91},{},[92],{"type":33,"value":93},"成本比预期高很多",{"type":27,"tag":45,"props":95,"children":96},{},[97],{"type":33,"value":98},"流式输出在前端体验上很丝滑，但服务端链路并不稳定",{"type":27,"tag":45,"props":100,"children":101},{},[102],{"type":33,"value":103},"同一个提示词，换个模型结果差一截",{"type":27,"tag":45,"props":105,"children":106},{},[107],{"type":33,"value":108},"明明只是“让 AI 帮我总结一下”，结果工程上要补一整套重试、限流、观测、fallback",{"type":27,"tag":35,"props":110,"children":111},{},[112],{"type":33,"value":113},"这时候团队才会意识到：",{"type":27,"tag":115,"props":116,"children":117},"blockquote",{},[118],{"type":27,"tag":35,"props":119,"children":120},{},[121],{"type":33,"value":122},"把 LLM 接进产品，真正难的不是 first call，而是把它做成一个可运营、可观测、可迭代的能力。",{"type":27,"tag":35,"props":124,"children":125},{},[126],{"type":33,"value":127},"这篇文章就从这个角度来讲。",{"type":27,"tag":35,"props":129,"children":130},{},[131],{"type":33,"value":132},"不只是“怎么调用接口”，而是讲清楚：",{"type":27,"tag":41,"props":134,"children":135},{},[136,141,146,151,156],{"type":27,"tag":45,"props":137,"children":138},{},[139],{"type":33,"value":140},"模型怎么选，为什么不能只看排行榜",{"type":27,"tag":45,"props":142,"children":143},{},[144],{"type":33,"value":145},"服务端如何封装统一 LLM Gateway",{"type":27,"tag":45,"props":147,"children":148},{},[149],{"type":33,"value":150},"流式输出、结构化输出、提示模板该怎么落地",{"type":27,"tag":45,"props":152,"children":153},{},[154],{"type":33,"value":155},"如何处理超时、重试、fallback、速率限制",{"type":27,"tag":45,"props":157,"children":158},{},[159],{"type":33,"value":160},"怎样把一次“能跑通”变成“能上线”的工程系统",{"type":27,"tag":35,"props":162,"children":163},{},[164],{"type":33,"value":165},"如果你准备把 AI 做进产品，这篇适合作为第一篇工程底稿。",{"type":27,"tag":167,"props":168,"children":169},"hr",{},[],{"type":27,"tag":171,"props":172,"children":174},"h2",{"id":173},"一先别急着写代码你接的是能力不是模型",[175],{"type":33,"value":176},"一、先别急着写代码：你接的是“能力”，不是“模型”",{"type":27,"tag":35,"props":178,"children":179},{},[180],{"type":33,"value":181},"很多团队集成 LLM 时，最先纠结的是：",{"type":27,"tag":76,"props":183,"children":184},{},[185,190,195],{"type":27,"tag":45,"props":186,"children":187},{},[188],{"type":33,"value":189},"选 OpenAI 还是国产模型",{"type":27,"tag":45,"props":191,"children":192},{},[193],{"type":33,"value":194},"选大模型还是轻量模型",{"type":27,"tag":45,"props":196,"children":197},{},[198],{"type":33,"value":199},"选通用模型还是推理模型",{"type":27,"tag":35,"props":201,"children":202},{},[203],{"type":33,"value":204},"这些问题都重要，但更本质的一层是：",{"type":27,"tag":115,"props":206,"children":207},{},[208],{"type":27,"tag":35,"props":209,"children":210},{},[211],{"type":27,"tag":212,"props":213,"children":214},"strong",{},[215],{"type":33,"value":216},"你到底想把哪种 AI 能力接进产品？",{"type":27,"tag":35,"props":218,"children":219},{},[220],{"type":33,"value":221},"因为“LLM API 集成”不是一个需求，而是一类需求的总称。常见至少有四类：",{"type":27,"tag":41,"props":223,"children":224},{},[225,230,235,240],{"type":27,"tag":45,"props":226,"children":227},{},[228],{"type":33,"value":229},"聊天对话",{"type":27,"tag":45,"props":231,"children":232},{},[233],{"type":33,"value":234},"文本生成 / 总结 / 改写",{"type":27,"tag":45,"props":236,"children":237},{},[238],{"type":33,"value":239},"结构化抽取",{"type":27,"tag":45,"props":241,"children":242},{},[243],{"type":33,"value":244},"Agent / 工具调用",{"type":27,"tag":35,"props":246,"children":247},{},[248],{"type":33,"value":249},"这四类需求，对模型的要求完全不同。",{"type":27,"tag":251,"props":252,"children":254},"h3",{"id":253},"_11-一个常见误区直接拿最强模型打所有场景",[255],{"type":33,"value":256},"1.1 一个常见误区：直接拿最强模型打所有场景",{"type":27,"tag":35,"props":258,"children":259},{},[260],{"type":33,"value":261},"这会带来三个问题：",{"type":27,"tag":76,"props":263,"children":264},{},[265,270,275],{"type":27,"tag":45,"props":266,"children":267},{},[268],{"type":33,"value":269},"成本高",{"type":27,"tag":45,"props":271,"children":272},{},[273],{"type":33,"value":274},"延迟高",{"type":27,"tag":45,"props":276,"children":277},{},[278],{"type":33,"value":279},"稳定性不一定最优",{"type":27,"tag":35,"props":281,"children":282},{},[283],{"type":33,"value":284},"比如：",{"type":27,"tag":76,"props":286,"children":287},{},[288,293,298],{"type":27,"tag":45,"props":289,"children":290},{},[291],{"type":33,"value":292},"简单分类和抽取，用轻量模型就够",{"type":27,"tag":45,"props":294,"children":295},{},[296],{"type":33,"value":297},"复杂长上下文推理，才值得上更强模型",{"type":27,"tag":45,"props":299,"children":300},{},[301],{"type":33,"value":302},"实时聊天对延迟敏感，可能优先选响应更快的模型",{"type":27,"tag":251,"props":304,"children":306},{"id":305},"_12-先画能力矩阵再选模型",[307],{"type":33,"value":308},"1.2 先画能力矩阵，再选模型",{"type":27,"tag":35,"props":310,"children":311},{},[312],{"type":33,"value":313},"一个更靠谱的做法是先列矩阵：",{"type":27,"tag":315,"props":316,"children":317},"table",{},[318,347],{"type":27,"tag":319,"props":320,"children":321},"thead",{},[322],{"type":27,"tag":323,"props":324,"children":325},"tr",{},[326,332,337,342],{"type":27,"tag":327,"props":328,"children":329},"th",{},[330],{"type":33,"value":331},"场景",{"type":27,"tag":327,"props":333,"children":334},{},[335],{"type":33,"value":336},"目标",{"type":27,"tag":327,"props":338,"children":339},{},[340],{"type":33,"value":341},"关键指标",{"type":27,"tag":327,"props":343,"children":344},{},[345],{"type":33,"value":346},"模型要求",{"type":27,"tag":348,"props":349,"children":350},"tbody",{},[351,375,398,421],{"type":27,"tag":323,"props":352,"children":353},{},[354,360,365,370],{"type":27,"tag":355,"props":356,"children":357},"td",{},[358],{"type":33,"value":359},"AI 客服初答",{"type":27,"tag":355,"props":361,"children":362},{},[363],{"type":33,"value":364},"快速、稳定",{"type":27,"tag":355,"props":366,"children":367},{},[368],{"type":33,"value":369},"首 token 延迟、成本",{"type":27,"tag":355,"props":371,"children":372},{},[373],{"type":33,"value":374},"速度优先",{"type":27,"tag":323,"props":376,"children":377},{},[378,383,388,393],{"type":27,"tag":355,"props":379,"children":380},{},[381],{"type":33,"value":382},"文档总结",{"type":27,"tag":355,"props":384,"children":385},{},[386],{"type":33,"value":387},"准确、结构清晰",{"type":27,"tag":355,"props":389,"children":390},{},[391],{"type":33,"value":392},"结果质量、长度控制",{"type":27,"tag":355,"props":394,"children":395},{},[396],{"type":33,"value":397},"指令遵循优先",{"type":27,"tag":323,"props":399,"children":400},{},[401,406,411,416],{"type":27,"tag":355,"props":402,"children":403},{},[404],{"type":33,"value":405},"信息抽取",{"type":27,"tag":355,"props":407,"children":408},{},[409],{"type":33,"value":410},"稳定 JSON",{"type":27,"tag":355,"props":412,"children":413},{},[414],{"type":33,"value":415},"结构正确率",{"type":27,"tag":355,"props":417,"children":418},{},[419],{"type":33,"value":420},"结构化能力优先",{"type":27,"tag":323,"props":422,"children":423},{},[424,429,434,439],{"type":27,"tag":355,"props":425,"children":426},{},[427],{"type":33,"value":428},"复杂问答",{"type":27,"tag":355,"props":430,"children":431},{},[432],{"type":33,"value":433},"推理完整",{"type":27,"tag":355,"props":435,"children":436},{},[437],{"type":33,"value":438},"正确率、上下文能力",{"type":27,"tag":355,"props":440,"children":441},{},[442],{"type":33,"value":443},"推理能力优先",{"type":27,"tag":35,"props":445,"children":446},{},[447],{"type":33,"value":448},"先回答“要什么能力”，再谈“用哪个模型”。",{"type":27,"tag":167,"props":450,"children":451},{},[],{"type":27,"tag":171,"props":453,"children":455},{"id":454},"二模型选型排行榜重要但不是决策终点",[456],{"type":33,"value":457},"二、模型选型：排行榜重要，但不是决策终点",{"type":27,"tag":251,"props":459,"children":461},{"id":460},"_21-选型时至少要看五个维度",[462],{"type":33,"value":463},"2.1 选型时至少要看五个维度",{"type":27,"tag":315,"props":465,"children":466},{},[467,488],{"type":27,"tag":319,"props":468,"children":469},{},[470],{"type":27,"tag":323,"props":471,"children":472},{},[473,478,483],{"type":27,"tag":327,"props":474,"children":475},{},[476],{"type":33,"value":477},"维度",{"type":27,"tag":327,"props":479,"children":480},{},[481],{"type":33,"value":482},"为什么重要",{"type":27,"tag":327,"props":484,"children":485},{},[486],{"type":33,"value":487},"例子",{"type":27,"tag":348,"props":489,"children":490},{},[491,509,527,545,563],{"type":27,"tag":323,"props":492,"children":493},{},[494,499,504],{"type":27,"tag":355,"props":495,"children":496},{},[497],{"type":33,"value":498},"质量",{"type":27,"tag":355,"props":500,"children":501},{},[502],{"type":33,"value":503},"决定结果是否可用",{"type":27,"tag":355,"props":505,"children":506},{},[507],{"type":33,"value":508},"回答准确率、幻觉率",{"type":27,"tag":323,"props":510,"children":511},{},[512,517,522],{"type":27,"tag":355,"props":513,"children":514},{},[515],{"type":33,"value":516},"时延",{"type":27,"tag":355,"props":518,"children":519},{},[520],{"type":33,"value":521},"决定产品体验",{"type":27,"tag":355,"props":523,"children":524},{},[525],{"type":33,"value":526},"首 token 时间、总耗时",{"type":27,"tag":323,"props":528,"children":529},{},[530,535,540],{"type":27,"tag":355,"props":531,"children":532},{},[533],{"type":33,"value":534},"成本",{"type":27,"tag":355,"props":536,"children":537},{},[538],{"type":33,"value":539},"决定是否能规模化",{"type":27,"tag":355,"props":541,"children":542},{},[543],{"type":33,"value":544},"输入输出 token 单价",{"type":27,"tag":323,"props":546,"children":547},{},[548,553,558],{"type":27,"tag":355,"props":549,"children":550},{},[551],{"type":33,"value":552},"稳定性",{"type":27,"tag":355,"props":554,"children":555},{},[556],{"type":33,"value":557},"决定能否上线",{"type":27,"tag":355,"props":559,"children":560},{},[561],{"type":33,"value":562},"限流、超时、成功率",{"type":27,"tag":323,"props":564,"children":565},{},[566,571,576],{"type":27,"tag":355,"props":567,"children":568},{},[569],{"type":33,"value":570},"能力特性",{"type":27,"tag":355,"props":572,"children":573},{},[574],{"type":33,"value":575},"决定能否满足场景",{"type":27,"tag":355,"props":577,"children":578},{},[579],{"type":33,"value":580},"function calling、JSON mode、长上下文",{"type":27,"tag":251,"props":582,"children":584},{"id":583},"_22-不同场景适合不同模型层级",[585],{"type":33,"value":586},"2.2 不同场景适合不同模型层级",{"type":27,"tag":35,"props":588,"children":589},{},[590],{"type":33,"value":591},"很多成熟产品会形成一个模型分层：",{"type":27,"tag":76,"props":593,"children":594},{},[595,600,605],{"type":27,"tag":45,"props":596,"children":597},{},[598],{"type":33,"value":599},"默认模型：覆盖 80% 常规请求",{"type":27,"tag":45,"props":601,"children":602},{},[603],{"type":33,"value":604},"高质量模型：用于复杂任务或高价值用户",{"type":27,"tag":45,"props":606,"children":607},{},[608],{"type":33,"value":609},"兜底模型：在主模型异常时保证可用性",{"type":27,"tag":35,"props":611,"children":612},{},[613],{"type":33,"value":614},"这比“All in 一个最强模型”更稳，也更省钱。",{"type":27,"tag":251,"props":616,"children":618},{"id":617},"_23-一个简单的选型策略",[619],{"type":33,"value":620},"2.3 一个简单的选型策略",{"type":27,"tag":622,"props":623,"children":626},"pre",{"className":624,"code":625,"language":33,"meta":7,"style":7},"language-text shiki shiki-themes github-dark","先定义业务场景\n  ↓\n用同一套评测样本比质量\n  ↓\n测真实首 token 和总响应时延\n  ↓\n估算单位请求成本\n  ↓\n再决定默认模型 + fallback 模型\n",[627],{"type":27,"tag":628,"props":629,"children":630},"code",{"__ignoreMap":7},[631,641,650,659,667,676,684,693,701],{"type":27,"tag":632,"props":633,"children":635},"span",{"class":634,"line":20},"line",[636],{"type":27,"tag":632,"props":637,"children":638},{},[639],{"type":33,"value":640},"先定义业务场景\n",{"type":27,"tag":632,"props":642,"children":644},{"class":634,"line":643},2,[645],{"type":27,"tag":632,"props":646,"children":647},{},[648],{"type":33,"value":649},"  ↓\n",{"type":27,"tag":632,"props":651,"children":653},{"class":634,"line":652},3,[654],{"type":27,"tag":632,"props":655,"children":656},{},[657],{"type":33,"value":658},"用同一套评测样本比质量\n",{"type":27,"tag":632,"props":660,"children":662},{"class":634,"line":661},4,[663],{"type":27,"tag":632,"props":664,"children":665},{},[666],{"type":33,"value":649},{"type":27,"tag":632,"props":668,"children":670},{"class":634,"line":669},5,[671],{"type":27,"tag":632,"props":672,"children":673},{},[674],{"type":33,"value":675},"测真实首 token 和总响应时延\n",{"type":27,"tag":632,"props":677,"children":679},{"class":634,"line":678},6,[680],{"type":27,"tag":632,"props":681,"children":682},{},[683],{"type":33,"value":649},{"type":27,"tag":632,"props":685,"children":687},{"class":634,"line":686},7,[688],{"type":27,"tag":632,"props":689,"children":690},{},[691],{"type":33,"value":692},"估算单位请求成本\n",{"type":27,"tag":632,"props":694,"children":696},{"class":634,"line":695},8,[697],{"type":27,"tag":632,"props":698,"children":699},{},[700],{"type":33,"value":649},{"type":27,"tag":632,"props":702,"children":704},{"class":634,"line":703},9,[705],{"type":27,"tag":632,"props":706,"children":707},{},[708],{"type":33,"value":709},"再决定默认模型 + fallback 模型\n",{"type":27,"tag":35,"props":711,"children":712},{},[713],{"type":33,"value":714},"如果没有自己的评测集，只看公开 benchmark，很容易选到“比赛成绩好、实际产品体验一般”的模型。",{"type":27,"tag":167,"props":716,"children":717},{},[],{"type":27,"tag":171,"props":719,"children":721},{"id":720},"三统一-llm-gateway不要让业务代码到处直接调模型",[722],{"type":33,"value":723},"三、统一 LLM Gateway：不要让业务代码到处直接调模型",{"type":27,"tag":35,"props":725,"children":726},{},[727],{"type":33,"value":728},"这是 AI 集成里非常重要的一步。",{"type":27,"tag":35,"props":730,"children":731},{},[732],{"type":33,"value":733},"如果每个业务团队都直接在代码里写：",{"type":27,"tag":76,"props":735,"children":736},{},[737,742,747,752,757],{"type":27,"tag":45,"props":738,"children":739},{},[740],{"type":33,"value":741},"provider SDK 调用",{"type":27,"tag":45,"props":743,"children":744},{},[745],{"type":33,"value":746},"prompt 拼接",{"type":27,"tag":45,"props":748,"children":749},{},[750],{"type":33,"value":751},"token 统计",{"type":27,"tag":45,"props":753,"children":754},{},[755],{"type":33,"value":756},"重试和超时",{"type":27,"tag":45,"props":758,"children":759},{},[760],{"type":33,"value":761},"日志埋点",{"type":27,"tag":35,"props":763,"children":764},{},[765],{"type":33,"value":766},"那么三个月后，整个仓库会充满“各自写了一套”的 AI 调用逻辑。",{"type":27,"tag":251,"props":768,"children":770},{"id":769},"_31-为什么需要统一网关层",[771],{"type":33,"value":772},"3.1 为什么需要统一网关层",{"type":27,"tag":35,"props":774,"children":775},{},[776],{"type":33,"value":777},"统一 LLM Gateway 可以收口：",{"type":27,"tag":76,"props":779,"children":780},{},[781,786,791,796,801,805,810],{"type":27,"tag":45,"props":782,"children":783},{},[784],{"type":33,"value":785},"模型路由",{"type":27,"tag":45,"props":787,"children":788},{},[789],{"type":33,"value":790},"密钥管理",{"type":27,"tag":45,"props":792,"children":793},{},[794],{"type":33,"value":795},"超时与重试",{"type":27,"tag":45,"props":797,"children":798},{},[799],{"type":33,"value":800},"流式响应封装",{"type":27,"tag":45,"props":802,"children":803},{},[804],{"type":33,"value":751},{"type":27,"tag":45,"props":806,"children":807},{},[808],{"type":33,"value":809},"结果审计与观测",{"type":27,"tag":45,"props":811,"children":812},{},[813],{"type":33,"value":814},"fallback 策略",{"type":27,"tag":251,"props":816,"children":818},{"id":817},"_32-一个简化示例",[819],{"type":33,"value":820},"3.2 一个简化示例",{"type":27,"tag":622,"props":822,"children":826},{"className":823,"code":824,"language":825,"meta":7,"style":7},"language-ts shiki shiki-themes github-dark","type LLMRequest = {\n  task: 'chat' | 'summary' | 'extract'\n  messages: Array\u003C{ role: 'system' | 'user' | 'assistant'; content: string }>\n  stream?: boolean\n  temperature?: number\n  responseFormat?: 'text' | 'json'\n}\n\ntype LLMResponse = {\n  content: string\n  model: string\n  usage?: {\n    promptTokens: number\n    completionTokens: number\n  }\n}\n\nexport async function callLLM(input: LLMRequest): Promise\u003CLLMResponse> {\n  const provider = routeModel(input)\n  const timeoutMs = input.task === 'chat' ? 12_000 : 20_000\n\n  return withRetry(\n    () => provider.generate(input, timeoutMs),\n    { retries: 2, backoffMs: 500 },\n  )\n}\n","ts",[827],{"type":27,"tag":628,"props":828,"children":829},{"__ignoreMap":7},[830,856,895,974,992,1009,1035,1043,1051,1071,1089,1106,1123,1140,1157,1166,1174,1182,1252,1280,1331,1339,1358,1387,1416,1425],{"type":27,"tag":632,"props":831,"children":832},{"class":634,"line":20},[833,839,845,850],{"type":27,"tag":632,"props":834,"children":836},{"style":835},"--shiki-default:#F97583",[837],{"type":33,"value":838},"type",{"type":27,"tag":632,"props":840,"children":842},{"style":841},"--shiki-default:#B392F0",[843],{"type":33,"value":844}," LLMRequest",{"type":27,"tag":632,"props":846,"children":847},{"style":835},[848],{"type":33,"value":849}," =",{"type":27,"tag":632,"props":851,"children":853},{"style":852},"--shiki-default:#E1E4E8",[854],{"type":33,"value":855}," {\n",{"type":27,"tag":632,"props":857,"children":858},{"class":634,"line":643},[859,865,870,876,881,886,890],{"type":27,"tag":632,"props":860,"children":862},{"style":861},"--shiki-default:#FFAB70",[863],{"type":33,"value":864},"  task",{"type":27,"tag":632,"props":866,"children":867},{"style":835},[868],{"type":33,"value":869},":",{"type":27,"tag":632,"props":871,"children":873},{"style":872},"--shiki-default:#9ECBFF",[874],{"type":33,"value":875}," 'chat'",{"type":27,"tag":632,"props":877,"children":878},{"style":835},[879],{"type":33,"value":880}," |",{"type":27,"tag":632,"props":882,"children":883},{"style":872},[884],{"type":33,"value":885}," 'summary'",{"type":27,"tag":632,"props":887,"children":888},{"style":835},[889],{"type":33,"value":880},{"type":27,"tag":632,"props":891,"children":892},{"style":872},[893],{"type":33,"value":894}," 'extract'\n",{"type":27,"tag":632,"props":896,"children":897},{"class":634,"line":652},[898,903,907,912,917,922,926,931,935,940,944,949,954,959,963,969],{"type":27,"tag":632,"props":899,"children":900},{"style":861},[901],{"type":33,"value":902},"  messages",{"type":27,"tag":632,"props":904,"children":905},{"style":835},[906],{"type":33,"value":869},{"type":27,"tag":632,"props":908,"children":909},{"style":841},[910],{"type":33,"value":911}," Array",{"type":27,"tag":632,"props":913,"children":914},{"style":852},[915],{"type":33,"value":916},"\u003C{ ",{"type":27,"tag":632,"props":918,"children":919},{"style":861},[920],{"type":33,"value":921},"role",{"type":27,"tag":632,"props":923,"children":924},{"style":835},[925],{"type":33,"value":869},{"type":27,"tag":632,"props":927,"children":928},{"style":872},[929],{"type":33,"value":930}," 'system'",{"type":27,"tag":632,"props":932,"children":933},{"style":835},[934],{"type":33,"value":880},{"type":27,"tag":632,"props":936,"children":937},{"style":872},[938],{"type":33,"value":939}," 'user'",{"type":27,"tag":632,"props":941,"children":942},{"style":835},[943],{"type":33,"value":880},{"type":27,"tag":632,"props":945,"children":946},{"style":872},[947],{"type":33,"value":948}," 'assistant'",{"type":27,"tag":632,"props":950,"children":951},{"style":852},[952],{"type":33,"value":953},"; ",{"type":27,"tag":632,"props":955,"children":956},{"style":861},[957],{"type":33,"value":958},"content",{"type":27,"tag":632,"props":960,"children":961},{"style":835},[962],{"type":33,"value":869},{"type":27,"tag":632,"props":964,"children":966},{"style":965},"--shiki-default:#79B8FF",[967],{"type":33,"value":968}," string",{"type":27,"tag":632,"props":970,"children":971},{"style":852},[972],{"type":33,"value":973}," }>\n",{"type":27,"tag":632,"props":975,"children":976},{"class":634,"line":661},[977,982,987],{"type":27,"tag":632,"props":978,"children":979},{"style":861},[980],{"type":33,"value":981},"  stream",{"type":27,"tag":632,"props":983,"children":984},{"style":835},[985],{"type":33,"value":986},"?:",{"type":27,"tag":632,"props":988,"children":989},{"style":965},[990],{"type":33,"value":991}," boolean\n",{"type":27,"tag":632,"props":993,"children":994},{"class":634,"line":669},[995,1000,1004],{"type":27,"tag":632,"props":996,"children":997},{"style":861},[998],{"type":33,"value":999},"  temperature",{"type":27,"tag":632,"props":1001,"children":1002},{"style":835},[1003],{"type":33,"value":986},{"type":27,"tag":632,"props":1005,"children":1006},{"style":965},[1007],{"type":33,"value":1008}," number\n",{"type":27,"tag":632,"props":1010,"children":1011},{"class":634,"line":678},[1012,1017,1021,1026,1030],{"type":27,"tag":632,"props":1013,"children":1014},{"style":861},[1015],{"type":33,"value":1016},"  responseFormat",{"type":27,"tag":632,"props":1018,"children":1019},{"style":835},[1020],{"type":33,"value":986},{"type":27,"tag":632,"props":1022,"children":1023},{"style":872},[1024],{"type":33,"value":1025}," 'text'",{"type":27,"tag":632,"props":1027,"children":1028},{"style":835},[1029],{"type":33,"value":880},{"type":27,"tag":632,"props":1031,"children":1032},{"style":872},[1033],{"type":33,"value":1034}," 'json'\n",{"type":27,"tag":632,"props":1036,"children":1037},{"class":634,"line":686},[1038],{"type":27,"tag":632,"props":1039,"children":1040},{"style":852},[1041],{"type":33,"value":1042},"}\n",{"type":27,"tag":632,"props":1044,"children":1045},{"class":634,"line":695},[1046],{"type":27,"tag":632,"props":1047,"children":1048},{"emptyLinePlaceholder":18},[1049],{"type":33,"value":1050},"\n",{"type":27,"tag":632,"props":1052,"children":1053},{"class":634,"line":703},[1054,1058,1063,1067],{"type":27,"tag":632,"props":1055,"children":1056},{"style":835},[1057],{"type":33,"value":838},{"type":27,"tag":632,"props":1059,"children":1060},{"style":841},[1061],{"type":33,"value":1062}," LLMResponse",{"type":27,"tag":632,"props":1064,"children":1065},{"style":835},[1066],{"type":33,"value":849},{"type":27,"tag":632,"props":1068,"children":1069},{"style":852},[1070],{"type":33,"value":855},{"type":27,"tag":632,"props":1072,"children":1074},{"class":634,"line":1073},10,[1075,1080,1084],{"type":27,"tag":632,"props":1076,"children":1077},{"style":861},[1078],{"type":33,"value":1079},"  content",{"type":27,"tag":632,"props":1081,"children":1082},{"style":835},[1083],{"type":33,"value":869},{"type":27,"tag":632,"props":1085,"children":1086},{"style":965},[1087],{"type":33,"value":1088}," string\n",{"type":27,"tag":632,"props":1090,"children":1092},{"class":634,"line":1091},11,[1093,1098,1102],{"type":27,"tag":632,"props":1094,"children":1095},{"style":861},[1096],{"type":33,"value":1097},"  model",{"type":27,"tag":632,"props":1099,"children":1100},{"style":835},[1101],{"type":33,"value":869},{"type":27,"tag":632,"props":1103,"children":1104},{"style":965},[1105],{"type":33,"value":1088},{"type":27,"tag":632,"props":1107,"children":1109},{"class":634,"line":1108},12,[1110,1115,1119],{"type":27,"tag":632,"props":1111,"children":1112},{"style":861},[1113],{"type":33,"value":1114},"  usage",{"type":27,"tag":632,"props":1116,"children":1117},{"style":835},[1118],{"type":33,"value":986},{"type":27,"tag":632,"props":1120,"children":1121},{"style":852},[1122],{"type":33,"value":855},{"type":27,"tag":632,"props":1124,"children":1126},{"class":634,"line":1125},13,[1127,1132,1136],{"type":27,"tag":632,"props":1128,"children":1129},{"style":861},[1130],{"type":33,"value":1131},"    promptTokens",{"type":27,"tag":632,"props":1133,"children":1134},{"style":835},[1135],{"type":33,"value":869},{"type":27,"tag":632,"props":1137,"children":1138},{"style":965},[1139],{"type":33,"value":1008},{"type":27,"tag":632,"props":1141,"children":1143},{"class":634,"line":1142},14,[1144,1149,1153],{"type":27,"tag":632,"props":1145,"children":1146},{"style":861},[1147],{"type":33,"value":1148},"    completionTokens",{"type":27,"tag":632,"props":1150,"children":1151},{"style":835},[1152],{"type":33,"value":869},{"type":27,"tag":632,"props":1154,"children":1155},{"style":965},[1156],{"type":33,"value":1008},{"type":27,"tag":632,"props":1158,"children":1160},{"class":634,"line":1159},15,[1161],{"type":27,"tag":632,"props":1162,"children":1163},{"style":852},[1164],{"type":33,"value":1165},"  }\n",{"type":27,"tag":632,"props":1167,"children":1169},{"class":634,"line":1168},16,[1170],{"type":27,"tag":632,"props":1171,"children":1172},{"style":852},[1173],{"type":33,"value":1042},{"type":27,"tag":632,"props":1175,"children":1177},{"class":634,"line":1176},17,[1178],{"type":27,"tag":632,"props":1179,"children":1180},{"emptyLinePlaceholder":18},[1181],{"type":33,"value":1050},{"type":27,"tag":632,"props":1183,"children":1184},{"class":634,"line":21},[1185,1190,1195,1200,1205,1210,1215,1219,1223,1228,1232,1237,1242,1247],{"type":27,"tag":632,"props":1186,"children":1187},{"style":835},[1188],{"type":33,"value":1189},"export",{"type":27,"tag":632,"props":1191,"children":1192},{"style":835},[1193],{"type":33,"value":1194}," async",{"type":27,"tag":632,"props":1196,"children":1197},{"style":835},[1198],{"type":33,"value":1199}," function",{"type":27,"tag":632,"props":1201,"children":1202},{"style":841},[1203],{"type":33,"value":1204}," callLLM",{"type":27,"tag":632,"props":1206,"children":1207},{"style":852},[1208],{"type":33,"value":1209},"(",{"type":27,"tag":632,"props":1211,"children":1212},{"style":861},[1213],{"type":33,"value":1214},"input",{"type":27,"tag":632,"props":1216,"children":1217},{"style":835},[1218],{"type":33,"value":869},{"type":27,"tag":632,"props":1220,"children":1221},{"style":841},[1222],{"type":33,"value":844},{"type":27,"tag":632,"props":1224,"children":1225},{"style":852},[1226],{"type":33,"value":1227},")",{"type":27,"tag":632,"props":1229,"children":1230},{"style":835},[1231],{"type":33,"value":869},{"type":27,"tag":632,"props":1233,"children":1234},{"style":841},[1235],{"type":33,"value":1236}," Promise",{"type":27,"tag":632,"props":1238,"children":1239},{"style":852},[1240],{"type":33,"value":1241},"\u003C",{"type":27,"tag":632,"props":1243,"children":1244},{"style":841},[1245],{"type":33,"value":1246},"LLMResponse",{"type":27,"tag":632,"props":1248,"children":1249},{"style":852},[1250],{"type":33,"value":1251},"> {\n",{"type":27,"tag":632,"props":1253,"children":1255},{"class":634,"line":1254},19,[1256,1261,1266,1270,1275],{"type":27,"tag":632,"props":1257,"children":1258},{"style":835},[1259],{"type":33,"value":1260},"  const",{"type":27,"tag":632,"props":1262,"children":1263},{"style":965},[1264],{"type":33,"value":1265}," provider",{"type":27,"tag":632,"props":1267,"children":1268},{"style":835},[1269],{"type":33,"value":849},{"type":27,"tag":632,"props":1271,"children":1272},{"style":841},[1273],{"type":33,"value":1274}," routeModel",{"type":27,"tag":632,"props":1276,"children":1277},{"style":852},[1278],{"type":33,"value":1279},"(input)\n",{"type":27,"tag":632,"props":1281,"children":1283},{"class":634,"line":1282},20,[1284,1288,1293,1297,1302,1307,1311,1316,1321,1326],{"type":27,"tag":632,"props":1285,"children":1286},{"style":835},[1287],{"type":33,"value":1260},{"type":27,"tag":632,"props":1289,"children":1290},{"style":965},[1291],{"type":33,"value":1292}," timeoutMs",{"type":27,"tag":632,"props":1294,"children":1295},{"style":835},[1296],{"type":33,"value":849},{"type":27,"tag":632,"props":1298,"children":1299},{"style":852},[1300],{"type":33,"value":1301}," input.task ",{"type":27,"tag":632,"props":1303,"children":1304},{"style":835},[1305],{"type":33,"value":1306},"===",{"type":27,"tag":632,"props":1308,"children":1309},{"style":872},[1310],{"type":33,"value":875},{"type":27,"tag":632,"props":1312,"children":1313},{"style":835},[1314],{"type":33,"value":1315}," ?",{"type":27,"tag":632,"props":1317,"children":1318},{"style":965},[1319],{"type":33,"value":1320}," 12_000",{"type":27,"tag":632,"props":1322,"children":1323},{"style":835},[1324],{"type":33,"value":1325}," :",{"type":27,"tag":632,"props":1327,"children":1328},{"style":965},[1329],{"type":33,"value":1330}," 20_000\n",{"type":27,"tag":632,"props":1332,"children":1334},{"class":634,"line":1333},21,[1335],{"type":27,"tag":632,"props":1336,"children":1337},{"emptyLinePlaceholder":18},[1338],{"type":33,"value":1050},{"type":27,"tag":632,"props":1340,"children":1342},{"class":634,"line":1341},22,[1343,1348,1353],{"type":27,"tag":632,"props":1344,"children":1345},{"style":835},[1346],{"type":33,"value":1347},"  return",{"type":27,"tag":632,"props":1349,"children":1350},{"style":841},[1351],{"type":33,"value":1352}," withRetry",{"type":27,"tag":632,"props":1354,"children":1355},{"style":852},[1356],{"type":33,"value":1357},"(\n",{"type":27,"tag":632,"props":1359,"children":1361},{"class":634,"line":1360},23,[1362,1367,1372,1377,1382],{"type":27,"tag":632,"props":1363,"children":1364},{"style":852},[1365],{"type":33,"value":1366},"    () ",{"type":27,"tag":632,"props":1368,"children":1369},{"style":835},[1370],{"type":33,"value":1371},"=>",{"type":27,"tag":632,"props":1373,"children":1374},{"style":852},[1375],{"type":33,"value":1376}," provider.",{"type":27,"tag":632,"props":1378,"children":1379},{"style":841},[1380],{"type":33,"value":1381},"generate",{"type":27,"tag":632,"props":1383,"children":1384},{"style":852},[1385],{"type":33,"value":1386},"(input, timeoutMs),\n",{"type":27,"tag":632,"props":1388,"children":1390},{"class":634,"line":1389},24,[1391,1396,1401,1406,1411],{"type":27,"tag":632,"props":1392,"children":1393},{"style":852},[1394],{"type":33,"value":1395},"    { retries: ",{"type":27,"tag":632,"props":1397,"children":1398},{"style":965},[1399],{"type":33,"value":1400},"2",{"type":27,"tag":632,"props":1402,"children":1403},{"style":852},[1404],{"type":33,"value":1405},", backoffMs: ",{"type":27,"tag":632,"props":1407,"children":1408},{"style":965},[1409],{"type":33,"value":1410},"500",{"type":27,"tag":632,"props":1412,"children":1413},{"style":852},[1414],{"type":33,"value":1415}," },\n",{"type":27,"tag":632,"props":1417,"children":1419},{"class":634,"line":1418},25,[1420],{"type":27,"tag":632,"props":1421,"children":1422},{"style":852},[1423],{"type":33,"value":1424},"  )\n",{"type":27,"tag":632,"props":1426,"children":1428},{"class":634,"line":1427},26,[1429],{"type":27,"tag":632,"props":1430,"children":1431},{"style":852},[1432],{"type":33,"value":1042},{"type":27,"tag":35,"props":1434,"children":1435},{},[1436],{"type":33,"value":1437},"这样业务层只需要表达“我要什么能力”，而不需要知道底层到底接的是哪个模型、哪个平台。",{"type":27,"tag":167,"props":1439,"children":1440},{},[],{"type":27,"tag":171,"props":1442,"children":1444},{"id":1443},"四prompt-不是字符串而是产品接口的一部分",[1445],{"type":33,"value":1446},"四、Prompt 不是字符串，而是产品接口的一部分",{"type":27,"tag":35,"props":1448,"children":1449},{},[1450],{"type":33,"value":1451},"很多人写 prompt 的方式很像写注释：想到什么就往里塞一点。",{"type":27,"tag":35,"props":1453,"children":1454},{},[1455],{"type":33,"value":1456},"这在 demo 阶段没问题，到产品阶段会出两个严重问题：",{"type":27,"tag":41,"props":1458,"children":1459},{},[1460,1465],{"type":27,"tag":45,"props":1461,"children":1462},{},[1463],{"type":33,"value":1464},"prompt 版本不可追踪",{"type":27,"tag":45,"props":1466,"children":1467},{},[1468],{"type":33,"value":1469},"不同团队各自魔改，结果质量不可控",{"type":27,"tag":251,"props":1471,"children":1473},{"id":1472},"_41-建议把-prompt-模板显式版本化",[1474],{"type":33,"value":1475},"4.1 建议把 prompt 模板显式版本化",{"type":27,"tag":622,"props":1477,"children":1479},{"className":823,"code":1478,"language":825,"meta":7,"style":7},"export const SUMMARY_PROMPT_V3 = `\n你是一个面向普通用户的内容总结助手。\n请遵守以下规则：\n1. 输出 3 条要点\n2. 每条不超过 40 字\n3. 不要编造原文没有的信息\n4. 最终输出 JSON：{\"bullets\": string[]}\n`\n",[1480],{"type":27,"tag":628,"props":1481,"children":1482},{"__ignoreMap":7},[1483,1509,1517,1525,1533,1541,1549,1557],{"type":27,"tag":632,"props":1484,"children":1485},{"class":634,"line":20},[1486,1490,1495,1500,1504],{"type":27,"tag":632,"props":1487,"children":1488},{"style":835},[1489],{"type":33,"value":1189},{"type":27,"tag":632,"props":1491,"children":1492},{"style":835},[1493],{"type":33,"value":1494}," const",{"type":27,"tag":632,"props":1496,"children":1497},{"style":965},[1498],{"type":33,"value":1499}," SUMMARY_PROMPT_V3",{"type":27,"tag":632,"props":1501,"children":1502},{"style":835},[1503],{"type":33,"value":849},{"type":27,"tag":632,"props":1505,"children":1506},{"style":872},[1507],{"type":33,"value":1508}," `\n",{"type":27,"tag":632,"props":1510,"children":1511},{"class":634,"line":643},[1512],{"type":27,"tag":632,"props":1513,"children":1514},{"style":872},[1515],{"type":33,"value":1516},"你是一个面向普通用户的内容总结助手。\n",{"type":27,"tag":632,"props":1518,"children":1519},{"class":634,"line":652},[1520],{"type":27,"tag":632,"props":1521,"children":1522},{"style":872},[1523],{"type":33,"value":1524},"请遵守以下规则：\n",{"type":27,"tag":632,"props":1526,"children":1527},{"class":634,"line":661},[1528],{"type":27,"tag":632,"props":1529,"children":1530},{"style":872},[1531],{"type":33,"value":1532},"1. 输出 3 条要点\n",{"type":27,"tag":632,"props":1534,"children":1535},{"class":634,"line":669},[1536],{"type":27,"tag":632,"props":1537,"children":1538},{"style":872},[1539],{"type":33,"value":1540},"2. 每条不超过 40 字\n",{"type":27,"tag":632,"props":1542,"children":1543},{"class":634,"line":678},[1544],{"type":27,"tag":632,"props":1545,"children":1546},{"style":872},[1547],{"type":33,"value":1548},"3. 不要编造原文没有的信息\n",{"type":27,"tag":632,"props":1550,"children":1551},{"class":634,"line":686},[1552],{"type":27,"tag":632,"props":1553,"children":1554},{"style":872},[1555],{"type":33,"value":1556},"4. 最终输出 JSON：{\"bullets\": string[]}\n",{"type":27,"tag":632,"props":1558,"children":1559},{"class":634,"line":695},[1560],{"type":27,"tag":632,"props":1561,"children":1562},{"style":872},[1563],{"type":33,"value":1564},"`\n",{"type":27,"tag":251,"props":1566,"children":1568},{"id":1567},"_42-为什么-prompt-要版本化",[1569],{"type":33,"value":1570},"4.2 为什么 prompt 要版本化",{"type":27,"tag":35,"props":1572,"children":1573},{},[1574],{"type":33,"value":1575},"因为它已经不是“临时文案”，而是决定结果质量的接口契约。",{"type":27,"tag":35,"props":1577,"children":1578},{},[1579],{"type":33,"value":1580},"当你要评估：",{"type":27,"tag":76,"props":1582,"children":1583},{},[1584,1589,1594],{"type":27,"tag":45,"props":1585,"children":1586},{},[1587],{"type":33,"value":1588},"为什么今天效果变差了",{"type":27,"tag":45,"props":1590,"children":1591},{},[1592],{"type":33,"value":1593},"为什么某个 A/B 桶表现不同",{"type":27,"tag":45,"props":1595,"children":1596},{},[1597],{"type":33,"value":1598},"为什么某些用户投诉答案跑偏",{"type":27,"tag":35,"props":1600,"children":1601},{},[1602],{"type":33,"value":1603},"没有 prompt 版本和调用记录，你很难定位。",{"type":27,"tag":251,"props":1605,"children":1607},{"id":1606},"_43-prompt-设计的一个务实原则",[1608],{"type":33,"value":1609},"4.3 Prompt 设计的一个务实原则",{"type":27,"tag":35,"props":1611,"children":1612},{},[1613],{"type":33,"value":1614},"不要让一个 prompt 同时承担太多目标。",{"type":27,"tag":35,"props":1616,"children":1617},{},[1618],{"type":33,"value":1619},"例如：",{"type":27,"tag":76,"props":1621,"children":1622},{},[1623,1628,1633,1638],{"type":27,"tag":45,"props":1624,"children":1625},{},[1626],{"type":33,"value":1627},"既要总结",{"type":27,"tag":45,"props":1629,"children":1630},{},[1631],{"type":33,"value":1632},"又要风格化",{"type":27,"tag":45,"props":1634,"children":1635},{},[1636],{"type":33,"value":1637},"还要抽取结构化字段",{"type":27,"tag":45,"props":1639,"children":1640},{},[1641],{"type":33,"value":1642},"顺便再生成标题和摘要",{"type":27,"tag":35,"props":1644,"children":1645},{},[1646,1648,1653],{"type":33,"value":1647},"任务越杂，稳定性越差。工程上更稳的方式是",{"type":27,"tag":212,"props":1649,"children":1650},{},[1651],{"type":33,"value":1652},"拆任务",{"type":33,"value":1654},"。",{"type":27,"tag":167,"props":1656,"children":1657},{},[],{"type":27,"tag":171,"props":1659,"children":1661},{"id":1660},"五结构化输出比写一段漂亮文字更值钱",[1662],{"type":33,"value":1663},"五、结构化输出：比“写一段漂亮文字”更值钱",{"type":27,"tag":35,"props":1665,"children":1666},{},[1667],{"type":33,"value":1668},"对真实产品来说，很多最有商业价值的场景不是聊天，而是：",{"type":27,"tag":76,"props":1670,"children":1671},{},[1672,1677,1682,1687],{"type":27,"tag":45,"props":1673,"children":1674},{},[1675],{"type":33,"value":1676},"从工单里抽意图",{"type":27,"tag":45,"props":1678,"children":1679},{},[1680],{"type":33,"value":1681},"从简历里抽结构化字段",{"type":27,"tag":45,"props":1683,"children":1684},{},[1685],{"type":33,"value":1686},"从用户问题里抽分类和优先级",{"type":27,"tag":45,"props":1688,"children":1689},{},[1690],{"type":33,"value":1691},"从长文里抽标题、摘要、标签",{"type":27,"tag":35,"props":1693,"children":1694},{},[1695],{"type":33,"value":1696},"这类场景最重要的不是“文字优美”，而是“结果结构稳定”。",{"type":27,"tag":251,"props":1698,"children":1700},{"id":1699},"_51-一个-json-输出示例",[1701],{"type":33,"value":1702},"5.1 一个 JSON 输出示例",{"type":27,"tag":622,"props":1704,"children":1706},{"className":823,"code":1705,"language":825,"meta":7,"style":7},"const request = {\n  task: 'extract',\n  responseFormat: 'json',\n  messages: [\n    {\n      role: 'system',\n      content: '你是信息抽取助手，只输出 JSON。',\n    },\n    {\n      role: 'user',\n      content: '帮我从这段投诉文本中提取问题类型、紧急程度和是否涉及退款。',\n    },\n  ],\n}\n",[1707],{"type":27,"tag":628,"props":1708,"children":1709},{"__ignoreMap":7},[1710,1731,1749,1766,1774,1782,1799,1816,1824,1831,1847,1863,1870,1878],{"type":27,"tag":632,"props":1711,"children":1712},{"class":634,"line":20},[1713,1718,1723,1727],{"type":27,"tag":632,"props":1714,"children":1715},{"style":835},[1716],{"type":33,"value":1717},"const",{"type":27,"tag":632,"props":1719,"children":1720},{"style":965},[1721],{"type":33,"value":1722}," request",{"type":27,"tag":632,"props":1724,"children":1725},{"style":835},[1726],{"type":33,"value":849},{"type":27,"tag":632,"props":1728,"children":1729},{"style":852},[1730],{"type":33,"value":855},{"type":27,"tag":632,"props":1732,"children":1733},{"class":634,"line":643},[1734,1739,1744],{"type":27,"tag":632,"props":1735,"children":1736},{"style":852},[1737],{"type":33,"value":1738},"  task: ",{"type":27,"tag":632,"props":1740,"children":1741},{"style":872},[1742],{"type":33,"value":1743},"'extract'",{"type":27,"tag":632,"props":1745,"children":1746},{"style":852},[1747],{"type":33,"value":1748},",\n",{"type":27,"tag":632,"props":1750,"children":1751},{"class":634,"line":652},[1752,1757,1762],{"type":27,"tag":632,"props":1753,"children":1754},{"style":852},[1755],{"type":33,"value":1756},"  responseFormat: ",{"type":27,"tag":632,"props":1758,"children":1759},{"style":872},[1760],{"type":33,"value":1761},"'json'",{"type":27,"tag":632,"props":1763,"children":1764},{"style":852},[1765],{"type":33,"value":1748},{"type":27,"tag":632,"props":1767,"children":1768},{"class":634,"line":661},[1769],{"type":27,"tag":632,"props":1770,"children":1771},{"style":852},[1772],{"type":33,"value":1773},"  messages: [\n",{"type":27,"tag":632,"props":1775,"children":1776},{"class":634,"line":669},[1777],{"type":27,"tag":632,"props":1778,"children":1779},{"style":852},[1780],{"type":33,"value":1781},"    {\n",{"type":27,"tag":632,"props":1783,"children":1784},{"class":634,"line":678},[1785,1790,1795],{"type":27,"tag":632,"props":1786,"children":1787},{"style":852},[1788],{"type":33,"value":1789},"      role: ",{"type":27,"tag":632,"props":1791,"children":1792},{"style":872},[1793],{"type":33,"value":1794},"'system'",{"type":27,"tag":632,"props":1796,"children":1797},{"style":852},[1798],{"type":33,"value":1748},{"type":27,"tag":632,"props":1800,"children":1801},{"class":634,"line":686},[1802,1807,1812],{"type":27,"tag":632,"props":1803,"children":1804},{"style":852},[1805],{"type":33,"value":1806},"      content: ",{"type":27,"tag":632,"props":1808,"children":1809},{"style":872},[1810],{"type":33,"value":1811},"'你是信息抽取助手，只输出 JSON。'",{"type":27,"tag":632,"props":1813,"children":1814},{"style":852},[1815],{"type":33,"value":1748},{"type":27,"tag":632,"props":1817,"children":1818},{"class":634,"line":695},[1819],{"type":27,"tag":632,"props":1820,"children":1821},{"style":852},[1822],{"type":33,"value":1823},"    },\n",{"type":27,"tag":632,"props":1825,"children":1826},{"class":634,"line":703},[1827],{"type":27,"tag":632,"props":1828,"children":1829},{"style":852},[1830],{"type":33,"value":1781},{"type":27,"tag":632,"props":1832,"children":1833},{"class":634,"line":1073},[1834,1838,1843],{"type":27,"tag":632,"props":1835,"children":1836},{"style":852},[1837],{"type":33,"value":1789},{"type":27,"tag":632,"props":1839,"children":1840},{"style":872},[1841],{"type":33,"value":1842},"'user'",{"type":27,"tag":632,"props":1844,"children":1845},{"style":852},[1846],{"type":33,"value":1748},{"type":27,"tag":632,"props":1848,"children":1849},{"class":634,"line":1091},[1850,1854,1859],{"type":27,"tag":632,"props":1851,"children":1852},{"style":852},[1853],{"type":33,"value":1806},{"type":27,"tag":632,"props":1855,"children":1856},{"style":872},[1857],{"type":33,"value":1858},"'帮我从这段投诉文本中提取问题类型、紧急程度和是否涉及退款。'",{"type":27,"tag":632,"props":1860,"children":1861},{"style":852},[1862],{"type":33,"value":1748},{"type":27,"tag":632,"props":1864,"children":1865},{"class":634,"line":1108},[1866],{"type":27,"tag":632,"props":1867,"children":1868},{"style":852},[1869],{"type":33,"value":1823},{"type":27,"tag":632,"props":1871,"children":1872},{"class":634,"line":1125},[1873],{"type":27,"tag":632,"props":1874,"children":1875},{"style":852},[1876],{"type":33,"value":1877},"  ],\n",{"type":27,"tag":632,"props":1879,"children":1880},{"class":634,"line":1142},[1881],{"type":27,"tag":632,"props":1882,"children":1883},{"style":852},[1884],{"type":33,"value":1042},{"type":27,"tag":251,"props":1886,"children":1888},{"id":1887},"_52-工程上还要做-schema-校验",[1889],{"type":33,"value":1890},"5.2 工程上还要做 schema 校验",{"type":27,"tag":35,"props":1892,"children":1893},{},[1894],{"type":33,"value":1895},"不要因为模型说“我会输出 JSON”，就相信它一定稳定。",{"type":27,"tag":35,"props":1897,"children":1898},{},[1899],{"type":33,"value":1900},"更稳的做法是：",{"type":27,"tag":622,"props":1902,"children":1904},{"className":823,"code":1903,"language":825,"meta":7,"style":7},"import { z } from 'zod'\n\nconst TicketSchema = z.object({\n  issueType: z.string(),\n  priority: z.enum(['low', 'medium', 'high']),\n  refundRelated: z.boolean(),\n})\n\nexport function parseTicketResult(raw: string) {\n  const parsed = JSON.parse(raw)\n  return TicketSchema.parse(parsed)\n}\n",[1905],{"type":27,"tag":628,"props":1906,"children":1907},{"__ignoreMap":7},[1908,1931,1938,1969,1987,2034,2051,2059,2066,2104,2140,2161],{"type":27,"tag":632,"props":1909,"children":1910},{"class":634,"line":20},[1911,1916,1921,1926],{"type":27,"tag":632,"props":1912,"children":1913},{"style":835},[1914],{"type":33,"value":1915},"import",{"type":27,"tag":632,"props":1917,"children":1918},{"style":852},[1919],{"type":33,"value":1920}," { z } ",{"type":27,"tag":632,"props":1922,"children":1923},{"style":835},[1924],{"type":33,"value":1925},"from",{"type":27,"tag":632,"props":1927,"children":1928},{"style":872},[1929],{"type":33,"value":1930}," 'zod'\n",{"type":27,"tag":632,"props":1932,"children":1933},{"class":634,"line":643},[1934],{"type":27,"tag":632,"props":1935,"children":1936},{"emptyLinePlaceholder":18},[1937],{"type":33,"value":1050},{"type":27,"tag":632,"props":1939,"children":1940},{"class":634,"line":652},[1941,1945,1950,1954,1959,1964],{"type":27,"tag":632,"props":1942,"children":1943},{"style":835},[1944],{"type":33,"value":1717},{"type":27,"tag":632,"props":1946,"children":1947},{"style":965},[1948],{"type":33,"value":1949}," TicketSchema",{"type":27,"tag":632,"props":1951,"children":1952},{"style":835},[1953],{"type":33,"value":849},{"type":27,"tag":632,"props":1955,"children":1956},{"style":852},[1957],{"type":33,"value":1958}," z.",{"type":27,"tag":632,"props":1960,"children":1961},{"style":841},[1962],{"type":33,"value":1963},"object",{"type":27,"tag":632,"props":1965,"children":1966},{"style":852},[1967],{"type":33,"value":1968},"({\n",{"type":27,"tag":632,"props":1970,"children":1971},{"class":634,"line":661},[1972,1977,1982],{"type":27,"tag":632,"props":1973,"children":1974},{"style":852},[1975],{"type":33,"value":1976},"  issueType: z.",{"type":27,"tag":632,"props":1978,"children":1979},{"style":841},[1980],{"type":33,"value":1981},"string",{"type":27,"tag":632,"props":1983,"children":1984},{"style":852},[1985],{"type":33,"value":1986},"(),\n",{"type":27,"tag":632,"props":1988,"children":1989},{"class":634,"line":669},[1990,1995,2000,2005,2010,2015,2020,2024,2029],{"type":27,"tag":632,"props":1991,"children":1992},{"style":852},[1993],{"type":33,"value":1994},"  priority: z.",{"type":27,"tag":632,"props":1996,"children":1997},{"style":841},[1998],{"type":33,"value":1999},"enum",{"type":27,"tag":632,"props":2001,"children":2002},{"style":852},[2003],{"type":33,"value":2004},"([",{"type":27,"tag":632,"props":2006,"children":2007},{"style":872},[2008],{"type":33,"value":2009},"'low'",{"type":27,"tag":632,"props":2011,"children":2012},{"style":852},[2013],{"type":33,"value":2014},", ",{"type":27,"tag":632,"props":2016,"children":2017},{"style":872},[2018],{"type":33,"value":2019},"'medium'",{"type":27,"tag":632,"props":2021,"children":2022},{"style":852},[2023],{"type":33,"value":2014},{"type":27,"tag":632,"props":2025,"children":2026},{"style":872},[2027],{"type":33,"value":2028},"'high'",{"type":27,"tag":632,"props":2030,"children":2031},{"style":852},[2032],{"type":33,"value":2033},"]),\n",{"type":27,"tag":632,"props":2035,"children":2036},{"class":634,"line":678},[2037,2042,2047],{"type":27,"tag":632,"props":2038,"children":2039},{"style":852},[2040],{"type":33,"value":2041},"  refundRelated: z.",{"type":27,"tag":632,"props":2043,"children":2044},{"style":841},[2045],{"type":33,"value":2046},"boolean",{"type":27,"tag":632,"props":2048,"children":2049},{"style":852},[2050],{"type":33,"value":1986},{"type":27,"tag":632,"props":2052,"children":2053},{"class":634,"line":686},[2054],{"type":27,"tag":632,"props":2055,"children":2056},{"style":852},[2057],{"type":33,"value":2058},"})\n",{"type":27,"tag":632,"props":2060,"children":2061},{"class":634,"line":695},[2062],{"type":27,"tag":632,"props":2063,"children":2064},{"emptyLinePlaceholder":18},[2065],{"type":33,"value":1050},{"type":27,"tag":632,"props":2067,"children":2068},{"class":634,"line":703},[2069,2073,2077,2082,2086,2091,2095,2099],{"type":27,"tag":632,"props":2070,"children":2071},{"style":835},[2072],{"type":33,"value":1189},{"type":27,"tag":632,"props":2074,"children":2075},{"style":835},[2076],{"type":33,"value":1199},{"type":27,"tag":632,"props":2078,"children":2079},{"style":841},[2080],{"type":33,"value":2081}," parseTicketResult",{"type":27,"tag":632,"props":2083,"children":2084},{"style":852},[2085],{"type":33,"value":1209},{"type":27,"tag":632,"props":2087,"children":2088},{"style":861},[2089],{"type":33,"value":2090},"raw",{"type":27,"tag":632,"props":2092,"children":2093},{"style":835},[2094],{"type":33,"value":869},{"type":27,"tag":632,"props":2096,"children":2097},{"style":965},[2098],{"type":33,"value":968},{"type":27,"tag":632,"props":2100,"children":2101},{"style":852},[2102],{"type":33,"value":2103},") {\n",{"type":27,"tag":632,"props":2105,"children":2106},{"class":634,"line":1073},[2107,2111,2116,2120,2125,2130,2135],{"type":27,"tag":632,"props":2108,"children":2109},{"style":835},[2110],{"type":33,"value":1260},{"type":27,"tag":632,"props":2112,"children":2113},{"style":965},[2114],{"type":33,"value":2115}," parsed",{"type":27,"tag":632,"props":2117,"children":2118},{"style":835},[2119],{"type":33,"value":849},{"type":27,"tag":632,"props":2121,"children":2122},{"style":965},[2123],{"type":33,"value":2124}," JSON",{"type":27,"tag":632,"props":2126,"children":2127},{"style":852},[2128],{"type":33,"value":2129},".",{"type":27,"tag":632,"props":2131,"children":2132},{"style":841},[2133],{"type":33,"value":2134},"parse",{"type":27,"tag":632,"props":2136,"children":2137},{"style":852},[2138],{"type":33,"value":2139},"(raw)\n",{"type":27,"tag":632,"props":2141,"children":2142},{"class":634,"line":1091},[2143,2147,2152,2156],{"type":27,"tag":632,"props":2144,"children":2145},{"style":835},[2146],{"type":33,"value":1347},{"type":27,"tag":632,"props":2148,"children":2149},{"style":852},[2150],{"type":33,"value":2151}," TicketSchema.",{"type":27,"tag":632,"props":2153,"children":2154},{"style":841},[2155],{"type":33,"value":2134},{"type":27,"tag":632,"props":2157,"children":2158},{"style":852},[2159],{"type":33,"value":2160},"(parsed)\n",{"type":27,"tag":632,"props":2162,"children":2163},{"class":634,"line":1108},[2164],{"type":27,"tag":632,"props":2165,"children":2166},{"style":852},[2167],{"type":33,"value":1042},{"type":27,"tag":35,"props":2169,"children":2170},{},[2171],{"type":33,"value":2172},"这样做的价值非常大：",{"type":27,"tag":76,"props":2174,"children":2175},{},[2176,2181,2186],{"type":27,"tag":45,"props":2177,"children":2178},{},[2179],{"type":33,"value":2180},"能快速识别异常输出",{"type":27,"tag":45,"props":2182,"children":2183},{},[2184],{"type":33,"value":2185},"能做 fallback",{"type":27,"tag":45,"props":2187,"children":2188},{},[2189],{"type":33,"value":2190},"能做统计，知道结构正确率到底是多少",{"type":27,"tag":167,"props":2192,"children":2193},{},[],{"type":27,"tag":171,"props":2195,"children":2197},{"id":2196},"六流式响应体验很香但链路要先稳",[2198],{"type":33,"value":2199},"六、流式响应：体验很香，但链路要先稳",{"type":27,"tag":35,"props":2201,"children":2202},{},[2203],{"type":33,"value":2204},"流式输出是很多 AI 产品体验的关键。用户看到文本一边生成，会明显感觉“系统在工作”。",{"type":27,"tag":251,"props":2206,"children":2208},{"id":2207},"_61-流式为什么值钱",[2209],{"type":33,"value":2210},"6.1 流式为什么值钱",{"type":27,"tag":35,"props":2212,"children":2213},{},[2214,2216,2221],{"type":33,"value":2215},"因为它改善的不是总耗时，而是",{"type":27,"tag":212,"props":2217,"children":2218},{},[2219],{"type":33,"value":2220},"感知等待时间",{"type":33,"value":1654},{"type":27,"tag":35,"props":2223,"children":2224},{},[2225],{"type":33,"value":2226},"一个 8 秒总响应的请求：",{"type":27,"tag":76,"props":2228,"children":2229},{},[2230,2235],{"type":27,"tag":45,"props":2231,"children":2232},{},[2233],{"type":33,"value":2234},"如果 8 秒后一次性返回，体验很差",{"type":27,"tag":45,"props":2236,"children":2237},{},[2238],{"type":33,"value":2239},"如果 800ms 后开始持续出字，用户容忍度会高很多",{"type":27,"tag":251,"props":2241,"children":2243},{"id":2242},"_62-一个-sse-示例",[2244],{"type":33,"value":2245},"6.2 一个 SSE 示例",{"type":27,"tag":622,"props":2247,"children":2249},{"className":823,"code":2248,"language":825,"meta":7,"style":7},"export async function streamChat(req, res) {\n  res.setHeader('Content-Type', 'text/event-stream')\n  res.setHeader('Cache-Control', 'no-cache')\n  res.setHeader('Connection', 'keep-alive')\n\n  for await (const chunk of llmProvider.stream(req.body.messages)) {\n    res.write(`data: ${JSON.stringify({ delta: chunk })}\\n\\n`)\n  }\n\n  res.write('event: done\\ndata: {}\\n\\n')\n  res.end()\n}\n",[2250],{"type":27,"tag":628,"props":2251,"children":2252},{"__ignoreMap":7},[2253,2295,2331,2364,2397,2404,2451,2521,2528,2535,2578,2595],{"type":27,"tag":632,"props":2254,"children":2255},{"class":634,"line":20},[2256,2260,2264,2268,2273,2277,2282,2286,2291],{"type":27,"tag":632,"props":2257,"children":2258},{"style":835},[2259],{"type":33,"value":1189},{"type":27,"tag":632,"props":2261,"children":2262},{"style":835},[2263],{"type":33,"value":1194},{"type":27,"tag":632,"props":2265,"children":2266},{"style":835},[2267],{"type":33,"value":1199},{"type":27,"tag":632,"props":2269,"children":2270},{"style":841},[2271],{"type":33,"value":2272}," streamChat",{"type":27,"tag":632,"props":2274,"children":2275},{"style":852},[2276],{"type":33,"value":1209},{"type":27,"tag":632,"props":2278,"children":2279},{"style":861},[2280],{"type":33,"value":2281},"req",{"type":27,"tag":632,"props":2283,"children":2284},{"style":852},[2285],{"type":33,"value":2014},{"type":27,"tag":632,"props":2287,"children":2288},{"style":861},[2289],{"type":33,"value":2290},"res",{"type":27,"tag":632,"props":2292,"children":2293},{"style":852},[2294],{"type":33,"value":2103},{"type":27,"tag":632,"props":2296,"children":2297},{"class":634,"line":643},[2298,2303,2308,2312,2317,2321,2326],{"type":27,"tag":632,"props":2299,"children":2300},{"style":852},[2301],{"type":33,"value":2302},"  res.",{"type":27,"tag":632,"props":2304,"children":2305},{"style":841},[2306],{"type":33,"value":2307},"setHeader",{"type":27,"tag":632,"props":2309,"children":2310},{"style":852},[2311],{"type":33,"value":1209},{"type":27,"tag":632,"props":2313,"children":2314},{"style":872},[2315],{"type":33,"value":2316},"'Content-Type'",{"type":27,"tag":632,"props":2318,"children":2319},{"style":852},[2320],{"type":33,"value":2014},{"type":27,"tag":632,"props":2322,"children":2323},{"style":872},[2324],{"type":33,"value":2325},"'text/event-stream'",{"type":27,"tag":632,"props":2327,"children":2328},{"style":852},[2329],{"type":33,"value":2330},")\n",{"type":27,"tag":632,"props":2332,"children":2333},{"class":634,"line":652},[2334,2338,2342,2346,2351,2355,2360],{"type":27,"tag":632,"props":2335,"children":2336},{"style":852},[2337],{"type":33,"value":2302},{"type":27,"tag":632,"props":2339,"children":2340},{"style":841},[2341],{"type":33,"value":2307},{"type":27,"tag":632,"props":2343,"children":2344},{"style":852},[2345],{"type":33,"value":1209},{"type":27,"tag":632,"props":2347,"children":2348},{"style":872},[2349],{"type":33,"value":2350},"'Cache-Control'",{"type":27,"tag":632,"props":2352,"children":2353},{"style":852},[2354],{"type":33,"value":2014},{"type":27,"tag":632,"props":2356,"children":2357},{"style":872},[2358],{"type":33,"value":2359},"'no-cache'",{"type":27,"tag":632,"props":2361,"children":2362},{"style":852},[2363],{"type":33,"value":2330},{"type":27,"tag":632,"props":2365,"children":2366},{"class":634,"line":661},[2367,2371,2375,2379,2384,2388,2393],{"type":27,"tag":632,"props":2368,"children":2369},{"style":852},[2370],{"type":33,"value":2302},{"type":27,"tag":632,"props":2372,"children":2373},{"style":841},[2374],{"type":33,"value":2307},{"type":27,"tag":632,"props":2376,"children":2377},{"style":852},[2378],{"type":33,"value":1209},{"type":27,"tag":632,"props":2380,"children":2381},{"style":872},[2382],{"type":33,"value":2383},"'Connection'",{"type":27,"tag":632,"props":2385,"children":2386},{"style":852},[2387],{"type":33,"value":2014},{"type":27,"tag":632,"props":2389,"children":2390},{"style":872},[2391],{"type":33,"value":2392},"'keep-alive'",{"type":27,"tag":632,"props":2394,"children":2395},{"style":852},[2396],{"type":33,"value":2330},{"type":27,"tag":632,"props":2398,"children":2399},{"class":634,"line":669},[2400],{"type":27,"tag":632,"props":2401,"children":2402},{"emptyLinePlaceholder":18},[2403],{"type":33,"value":1050},{"type":27,"tag":632,"props":2405,"children":2406},{"class":634,"line":678},[2407,2412,2417,2422,2426,2431,2436,2441,2446],{"type":27,"tag":632,"props":2408,"children":2409},{"style":835},[2410],{"type":33,"value":2411},"  for",{"type":27,"tag":632,"props":2413,"children":2414},{"style":835},[2415],{"type":33,"value":2416}," await",{"type":27,"tag":632,"props":2418,"children":2419},{"style":852},[2420],{"type":33,"value":2421}," (",{"type":27,"tag":632,"props":2423,"children":2424},{"style":835},[2425],{"type":33,"value":1717},{"type":27,"tag":632,"props":2427,"children":2428},{"style":965},[2429],{"type":33,"value":2430}," chunk",{"type":27,"tag":632,"props":2432,"children":2433},{"style":835},[2434],{"type":33,"value":2435}," of",{"type":27,"tag":632,"props":2437,"children":2438},{"style":852},[2439],{"type":33,"value":2440}," llmProvider.",{"type":27,"tag":632,"props":2442,"children":2443},{"style":841},[2444],{"type":33,"value":2445},"stream",{"type":27,"tag":632,"props":2447,"children":2448},{"style":852},[2449],{"type":33,"value":2450},"(req.body.messages)) {\n",{"type":27,"tag":632,"props":2452,"children":2453},{"class":634,"line":686},[2454,2459,2464,2468,2473,2478,2482,2487,2492,2497,2502,2507,2512,2517],{"type":27,"tag":632,"props":2455,"children":2456},{"style":852},[2457],{"type":33,"value":2458},"    res.",{"type":27,"tag":632,"props":2460,"children":2461},{"style":841},[2462],{"type":33,"value":2463},"write",{"type":27,"tag":632,"props":2465,"children":2466},{"style":852},[2467],{"type":33,"value":1209},{"type":27,"tag":632,"props":2469,"children":2470},{"style":872},[2471],{"type":33,"value":2472},"`data: ${",{"type":27,"tag":632,"props":2474,"children":2475},{"style":965},[2476],{"type":33,"value":2477},"JSON",{"type":27,"tag":632,"props":2479,"children":2480},{"style":872},[2481],{"type":33,"value":2129},{"type":27,"tag":632,"props":2483,"children":2484},{"style":841},[2485],{"type":33,"value":2486},"stringify",{"type":27,"tag":632,"props":2488,"children":2489},{"style":872},[2490],{"type":33,"value":2491},"({ delta: ",{"type":27,"tag":632,"props":2493,"children":2494},{"style":852},[2495],{"type":33,"value":2496},"chunk",{"type":27,"tag":632,"props":2498,"children":2499},{"style":872},[2500],{"type":33,"value":2501}," })",{"type":27,"tag":632,"props":2503,"children":2504},{"style":872},[2505],{"type":33,"value":2506},"}",{"type":27,"tag":632,"props":2508,"children":2509},{"style":965},[2510],{"type":33,"value":2511},"\\n\\n",{"type":27,"tag":632,"props":2513,"children":2514},{"style":872},[2515],{"type":33,"value":2516},"`",{"type":27,"tag":632,"props":2518,"children":2519},{"style":852},[2520],{"type":33,"value":2330},{"type":27,"tag":632,"props":2522,"children":2523},{"class":634,"line":695},[2524],{"type":27,"tag":632,"props":2525,"children":2526},{"style":852},[2527],{"type":33,"value":1165},{"type":27,"tag":632,"props":2529,"children":2530},{"class":634,"line":703},[2531],{"type":27,"tag":632,"props":2532,"children":2533},{"emptyLinePlaceholder":18},[2534],{"type":33,"value":1050},{"type":27,"tag":632,"props":2536,"children":2537},{"class":634,"line":1073},[2538,2542,2546,2550,2555,2560,2565,2569,2574],{"type":27,"tag":632,"props":2539,"children":2540},{"style":852},[2541],{"type":33,"value":2302},{"type":27,"tag":632,"props":2543,"children":2544},{"style":841},[2545],{"type":33,"value":2463},{"type":27,"tag":632,"props":2547,"children":2548},{"style":852},[2549],{"type":33,"value":1209},{"type":27,"tag":632,"props":2551,"children":2552},{"style":872},[2553],{"type":33,"value":2554},"'event: done",{"type":27,"tag":632,"props":2556,"children":2557},{"style":965},[2558],{"type":33,"value":2559},"\\n",{"type":27,"tag":632,"props":2561,"children":2562},{"style":872},[2563],{"type":33,"value":2564},"data: {}",{"type":27,"tag":632,"props":2566,"children":2567},{"style":965},[2568],{"type":33,"value":2511},{"type":27,"tag":632,"props":2570,"children":2571},{"style":872},[2572],{"type":33,"value":2573},"'",{"type":27,"tag":632,"props":2575,"children":2576},{"style":852},[2577],{"type":33,"value":2330},{"type":27,"tag":632,"props":2579,"children":2580},{"class":634,"line":1091},[2581,2585,2590],{"type":27,"tag":632,"props":2582,"children":2583},{"style":852},[2584],{"type":33,"value":2302},{"type":27,"tag":632,"props":2586,"children":2587},{"style":841},[2588],{"type":33,"value":2589},"end",{"type":27,"tag":632,"props":2591,"children":2592},{"style":852},[2593],{"type":33,"value":2594},"()\n",{"type":27,"tag":632,"props":2596,"children":2597},{"class":634,"line":1108},[2598],{"type":27,"tag":632,"props":2599,"children":2600},{"style":852},[2601],{"type":33,"value":1042},{"type":27,"tag":251,"props":2603,"children":2605},{"id":2604},"_63-流式集成的三个真实坑",[2606],{"type":33,"value":2607},"6.3 流式集成的三个真实坑",{"type":27,"tag":41,"props":2609,"children":2610},{},[2611,2616,2621],{"type":27,"tag":45,"props":2612,"children":2613},{},[2614],{"type":33,"value":2615},"中途断流后前端如何恢复状态",{"type":27,"tag":45,"props":2617,"children":2618},{},[2619],{"type":33,"value":2620},"网关、代理、CDN 是否支持长连接流式透传",{"type":27,"tag":45,"props":2622,"children":2623},{},[2624],{"type":33,"value":2625},"如果模型侧超时，用户看到的是“停住了”还是明确失败",{"type":27,"tag":35,"props":2627,"children":2628},{},[2629],{"type":33,"value":2630},"所以流式响应不只是前端效果，而是全链路兼容性问题。",{"type":27,"tag":167,"props":2632,"children":2633},{},[],{"type":27,"tag":171,"props":2635,"children":2637},{"id":2636},"七超时重试fallbackai-不稳定是常态不是例外",[2638],{"type":33,"value":2639},"七、超时、重试、fallback：AI 不稳定是常态，不是例外",{"type":27,"tag":35,"props":2641,"children":2642},{},[2643],{"type":33,"value":2644},"这点必须讲清楚。",{"type":27,"tag":35,"props":2646,"children":2647},{},[2648],{"type":33,"value":2649},"LLM API 和传统内部 RPC 最大不同之一，就是：",{"type":27,"tag":76,"props":2651,"children":2652},{},[2653,2658,2663,2668],{"type":27,"tag":45,"props":2654,"children":2655},{},[2656],{"type":33,"value":2657},"结果不完全确定",{"type":27,"tag":45,"props":2659,"children":2660},{},[2661],{"type":33,"value":2662},"延迟波动更大",{"type":27,"tag":45,"props":2664,"children":2665},{},[2666],{"type":33,"value":2667},"provider 配额和限流更明显",{"type":27,"tag":45,"props":2669,"children":2670},{},[2671],{"type":33,"value":2672},"某些失败并不是代码 bug，而是平台抖动",{"type":27,"tag":251,"props":2674,"children":2676},{"id":2675},"_71-重试不能无脑做",[2677],{"type":33,"value":2678},"7.1 重试不能无脑做",{"type":27,"tag":35,"props":2680,"children":2681},{},[2682],{"type":33,"value":2683},"如果一个请求：",{"type":27,"tag":76,"props":2685,"children":2686},{},[2687,2692,2697],{"type":27,"tag":45,"props":2688,"children":2689},{},[2690],{"type":33,"value":2691},"已经很贵",{"type":27,"tag":45,"props":2693,"children":2694},{},[2695],{"type":33,"value":2696},"已经很慢",{"type":27,"tag":45,"props":2698,"children":2699},{},[2700],{"type":33,"value":2701},"失败原因是配额或参数问题",{"type":27,"tag":35,"props":2703,"children":2704},{},[2705],{"type":33,"value":2706},"无脑重试只会更贵、更慢。",{"type":27,"tag":251,"props":2708,"children":2710},{"id":2709},"_72-更合理的策略",[2711],{"type":33,"value":2712},"7.2 更合理的策略",{"type":27,"tag":315,"props":2714,"children":2715},{},[2716,2731],{"type":27,"tag":319,"props":2717,"children":2718},{},[2719],{"type":27,"tag":323,"props":2720,"children":2721},{},[2722,2726],{"type":27,"tag":327,"props":2723,"children":2724},{},[2725],{"type":33,"value":331},{"type":27,"tag":327,"props":2727,"children":2728},{},[2729],{"type":33,"value":2730},"策略",{"type":27,"tag":348,"props":2732,"children":2733},{},[2734,2747,2760,2773],{"type":27,"tag":323,"props":2735,"children":2736},{},[2737,2742],{"type":27,"tag":355,"props":2738,"children":2739},{},[2740],{"type":33,"value":2741},"网络抖动 / 短暂 5xx",{"type":27,"tag":355,"props":2743,"children":2744},{},[2745],{"type":33,"value":2746},"可有限重试",{"type":27,"tag":323,"props":2748,"children":2749},{},[2750,2755],{"type":27,"tag":355,"props":2751,"children":2752},{},[2753],{"type":33,"value":2754},"参数错误 / schema 错误",{"type":27,"tag":355,"props":2756,"children":2757},{},[2758],{"type":33,"value":2759},"不重试，直接修输入",{"type":27,"tag":323,"props":2761,"children":2762},{},[2763,2768],{"type":27,"tag":355,"props":2764,"children":2765},{},[2766],{"type":33,"value":2767},"超时",{"type":27,"tag":355,"props":2769,"children":2770},{},[2771],{"type":33,"value":2772},"看业务场景，必要时降级",{"type":27,"tag":323,"props":2774,"children":2775},{},[2776,2781],{"type":27,"tag":355,"props":2777,"children":2778},{},[2779],{"type":33,"value":2780},"配额限制",{"type":27,"tag":355,"props":2782,"children":2783},{},[2784],{"type":33,"value":2785},"排队、限流或切换备用模型",{"type":27,"tag":251,"props":2787,"children":2789},{"id":2788},"_73-一个-fallback-示例",[2790],{"type":33,"value":2791},"7.3 一个 fallback 示例",{"type":27,"tag":622,"props":2793,"children":2795},{"className":823,"code":2794,"language":825,"meta":7,"style":7},"export async function generateSummary(text: string) {\n  try {\n    return await callLLM({\n      task: 'summary',\n      messages: [{ role: 'user', content: text }],\n    })\n  } catch {\n    return await callBackupModel({\n      task: 'summary',\n      messages: [{ role: 'user', content: text }],\n    })\n  }\n}\n",[2796],{"type":27,"tag":628,"props":2797,"children":2798},{"__ignoreMap":7},[2799,2839,2851,2871,2888,2905,2913,2930,2950,2965,2980,2987,2994],{"type":27,"tag":632,"props":2800,"children":2801},{"class":634,"line":20},[2802,2806,2810,2814,2819,2823,2827,2831,2835],{"type":27,"tag":632,"props":2803,"children":2804},{"style":835},[2805],{"type":33,"value":1189},{"type":27,"tag":632,"props":2807,"children":2808},{"style":835},[2809],{"type":33,"value":1194},{"type":27,"tag":632,"props":2811,"children":2812},{"style":835},[2813],{"type":33,"value":1199},{"type":27,"tag":632,"props":2815,"children":2816},{"style":841},[2817],{"type":33,"value":2818}," generateSummary",{"type":27,"tag":632,"props":2820,"children":2821},{"style":852},[2822],{"type":33,"value":1209},{"type":27,"tag":632,"props":2824,"children":2825},{"style":861},[2826],{"type":33,"value":33},{"type":27,"tag":632,"props":2828,"children":2829},{"style":835},[2830],{"type":33,"value":869},{"type":27,"tag":632,"props":2832,"children":2833},{"style":965},[2834],{"type":33,"value":968},{"type":27,"tag":632,"props":2836,"children":2837},{"style":852},[2838],{"type":33,"value":2103},{"type":27,"tag":632,"props":2840,"children":2841},{"class":634,"line":643},[2842,2847],{"type":27,"tag":632,"props":2843,"children":2844},{"style":835},[2845],{"type":33,"value":2846},"  try",{"type":27,"tag":632,"props":2848,"children":2849},{"style":852},[2850],{"type":33,"value":855},{"type":27,"tag":632,"props":2852,"children":2853},{"class":634,"line":652},[2854,2859,2863,2867],{"type":27,"tag":632,"props":2855,"children":2856},{"style":835},[2857],{"type":33,"value":2858},"    return",{"type":27,"tag":632,"props":2860,"children":2861},{"style":835},[2862],{"type":33,"value":2416},{"type":27,"tag":632,"props":2864,"children":2865},{"style":841},[2866],{"type":33,"value":1204},{"type":27,"tag":632,"props":2868,"children":2869},{"style":852},[2870],{"type":33,"value":1968},{"type":27,"tag":632,"props":2872,"children":2873},{"class":634,"line":661},[2874,2879,2884],{"type":27,"tag":632,"props":2875,"children":2876},{"style":852},[2877],{"type":33,"value":2878},"      task: ",{"type":27,"tag":632,"props":2880,"children":2881},{"style":872},[2882],{"type":33,"value":2883},"'summary'",{"type":27,"tag":632,"props":2885,"children":2886},{"style":852},[2887],{"type":33,"value":1748},{"type":27,"tag":632,"props":2889,"children":2890},{"class":634,"line":669},[2891,2896,2900],{"type":27,"tag":632,"props":2892,"children":2893},{"style":852},[2894],{"type":33,"value":2895},"      messages: [{ role: ",{"type":27,"tag":632,"props":2897,"children":2898},{"style":872},[2899],{"type":33,"value":1842},{"type":27,"tag":632,"props":2901,"children":2902},{"style":852},[2903],{"type":33,"value":2904},", content: text }],\n",{"type":27,"tag":632,"props":2906,"children":2907},{"class":634,"line":678},[2908],{"type":27,"tag":632,"props":2909,"children":2910},{"style":852},[2911],{"type":33,"value":2912},"    })\n",{"type":27,"tag":632,"props":2914,"children":2915},{"class":634,"line":686},[2916,2921,2926],{"type":27,"tag":632,"props":2917,"children":2918},{"style":852},[2919],{"type":33,"value":2920},"  } ",{"type":27,"tag":632,"props":2922,"children":2923},{"style":835},[2924],{"type":33,"value":2925},"catch",{"type":27,"tag":632,"props":2927,"children":2928},{"style":852},[2929],{"type":33,"value":855},{"type":27,"tag":632,"props":2931,"children":2932},{"class":634,"line":695},[2933,2937,2941,2946],{"type":27,"tag":632,"props":2934,"children":2935},{"style":835},[2936],{"type":33,"value":2858},{"type":27,"tag":632,"props":2938,"children":2939},{"style":835},[2940],{"type":33,"value":2416},{"type":27,"tag":632,"props":2942,"children":2943},{"style":841},[2944],{"type":33,"value":2945}," callBackupModel",{"type":27,"tag":632,"props":2947,"children":2948},{"style":852},[2949],{"type":33,"value":1968},{"type":27,"tag":632,"props":2951,"children":2952},{"class":634,"line":703},[2953,2957,2961],{"type":27,"tag":632,"props":2954,"children":2955},{"style":852},[2956],{"type":33,"value":2878},{"type":27,"tag":632,"props":2958,"children":2959},{"style":872},[2960],{"type":33,"value":2883},{"type":27,"tag":632,"props":2962,"children":2963},{"style":852},[2964],{"type":33,"value":1748},{"type":27,"tag":632,"props":2966,"children":2967},{"class":634,"line":1073},[2968,2972,2976],{"type":27,"tag":632,"props":2969,"children":2970},{"style":852},[2971],{"type":33,"value":2895},{"type":27,"tag":632,"props":2973,"children":2974},{"style":872},[2975],{"type":33,"value":1842},{"type":27,"tag":632,"props":2977,"children":2978},{"style":852},[2979],{"type":33,"value":2904},{"type":27,"tag":632,"props":2981,"children":2982},{"class":634,"line":1091},[2983],{"type":27,"tag":632,"props":2984,"children":2985},{"style":852},[2986],{"type":33,"value":2912},{"type":27,"tag":632,"props":2988,"children":2989},{"class":634,"line":1108},[2990],{"type":27,"tag":632,"props":2991,"children":2992},{"style":852},[2993],{"type":33,"value":1165},{"type":27,"tag":632,"props":2995,"children":2996},{"class":634,"line":1125},[2997],{"type":27,"tag":632,"props":2998,"children":2999},{"style":852},[3000],{"type":33,"value":1042},{"type":27,"tag":35,"props":3002,"children":3003},{},[3004],{"type":33,"value":3005},"fallback 的目标不是保证“完全同质量”，而是保证核心能力不要整体失效。",{"type":27,"tag":167,"props":3007,"children":3008},{},[],{"type":27,"tag":171,"props":3010,"children":3012},{"id":3011},"八观测体系如果你看不见-token时延和失败原因就不算真正接入完成",[3013],{"type":33,"value":3014},"八、观测体系：如果你看不见 token、时延和失败原因，就不算真正接入完成",{"type":27,"tag":35,"props":3016,"children":3017},{},[3018],{"type":33,"value":3019},"很多团队接好后，只有“成功 / 失败”日志。",{"type":27,"tag":35,"props":3021,"children":3022},{},[3023],{"type":33,"value":3024},"这远远不够。",{"type":27,"tag":251,"props":3026,"children":3028},{"id":3027},"_81-至少应该记录这些指标",[3029],{"type":33,"value":3030},"8.1 至少应该记录这些指标",{"type":27,"tag":315,"props":3032,"children":3033},{},[3034,3050],{"type":27,"tag":319,"props":3035,"children":3036},{},[3037],{"type":27,"tag":323,"props":3038,"children":3039},{},[3040,3045],{"type":27,"tag":327,"props":3041,"children":3042},{},[3043],{"type":33,"value":3044},"指标",{"type":27,"tag":327,"props":3046,"children":3047},{},[3048],{"type":33,"value":3049},"作用",{"type":27,"tag":348,"props":3051,"children":3052},{},[3053,3066,3079,3092,3105,3118,3131],{"type":27,"tag":323,"props":3054,"children":3055},{},[3056,3061],{"type":27,"tag":355,"props":3057,"children":3058},{},[3059],{"type":33,"value":3060},"请求量",{"type":27,"tag":355,"props":3062,"children":3063},{},[3064],{"type":33,"value":3065},"看使用规模",{"type":27,"tag":323,"props":3067,"children":3068},{},[3069,3074],{"type":27,"tag":355,"props":3070,"children":3071},{},[3072],{"type":33,"value":3073},"首 token 延迟",{"type":27,"tag":355,"props":3075,"children":3076},{},[3077],{"type":33,"value":3078},"看体验",{"type":27,"tag":323,"props":3080,"children":3081},{},[3082,3087],{"type":27,"tag":355,"props":3083,"children":3084},{},[3085],{"type":33,"value":3086},"总响应时间",{"type":27,"tag":355,"props":3088,"children":3089},{},[3090],{"type":33,"value":3091},"看吞吐与稳定性",{"type":27,"tag":323,"props":3093,"children":3094},{},[3095,3100],{"type":27,"tag":355,"props":3096,"children":3097},{},[3098],{"type":33,"value":3099},"prompt tokens / completion tokens",{"type":27,"tag":355,"props":3101,"children":3102},{},[3103],{"type":33,"value":3104},"看成本",{"type":27,"tag":323,"props":3106,"children":3107},{},[3108,3113],{"type":27,"tag":355,"props":3109,"children":3110},{},[3111],{"type":33,"value":3112},"结构化输出成功率",{"type":27,"tag":355,"props":3114,"children":3115},{},[3116],{"type":33,"value":3117},"看结果稳定性",{"type":27,"tag":323,"props":3119,"children":3120},{},[3121,3126],{"type":27,"tag":355,"props":3122,"children":3123},{},[3124],{"type":33,"value":3125},"provider 错误码",{"type":27,"tag":355,"props":3127,"children":3128},{},[3129],{"type":33,"value":3130},"看失败类型",{"type":27,"tag":323,"props":3132,"children":3133},{},[3134,3139],{"type":27,"tag":355,"props":3135,"children":3136},{},[3137],{"type":33,"value":3138},"fallback 触发率",{"type":27,"tag":355,"props":3140,"children":3141},{},[3142],{"type":33,"value":3143},"看主模型健康度",{"type":27,"tag":251,"props":3145,"children":3147},{"id":3146},"_82-为什么这组指标很关键",[3148],{"type":33,"value":3149},"8.2 为什么这组指标很关键",{"type":27,"tag":35,"props":3151,"children":3152},{},[3153],{"type":33,"value":3154},"因为 AI 产品优化时，很多问题不能只凭体感判断。",{"type":27,"tag":35,"props":3156,"children":3157},{},[3158],{"type":33,"value":1619},{"type":27,"tag":76,"props":3160,"children":3161},{},[3162,3167,3172,3177],{"type":27,"tag":45,"props":3163,"children":3164},{},[3165],{"type":33,"value":3166},"为什么最近费用变高了",{"type":27,"tag":45,"props":3168,"children":3169},{},[3170],{"type":33,"value":3171},"为什么某个功能感觉更慢",{"type":27,"tag":45,"props":3173,"children":3174},{},[3175],{"type":33,"value":3176},"为什么结构化结果出错更多",{"type":27,"tag":45,"props":3178,"children":3179},{},[3180],{"type":33,"value":3181},"为什么用户抱怨“经常卡住”",{"type":27,"tag":35,"props":3183,"children":3184},{},[3185],{"type":33,"value":3186},"这些都需要量化观察。",{"type":27,"tag":167,"props":3188,"children":3189},{},[],{"type":27,"tag":171,"props":3191,"children":3193},{"id":3192},"九把-llm-接到生产系统里还需要哪些工程补丁",[3194],{"type":33,"value":3195},"九、把 LLM 接到生产系统里，还需要哪些工程补丁",{"type":27,"tag":251,"props":3197,"children":3199},{"id":3198},"_91-内容审查与安全兜底",[3200],{"type":33,"value":3201},"9.1 内容审查与安全兜底",{"type":27,"tag":35,"props":3203,"children":3204},{},[3205],{"type":33,"value":3206},"即使不做复杂 AI 安全，至少也要：",{"type":27,"tag":76,"props":3208,"children":3209},{},[3210,3215,3220],{"type":27,"tag":45,"props":3211,"children":3212},{},[3213],{"type":33,"value":3214},"对输入长度做限制",{"type":27,"tag":45,"props":3216,"children":3217},{},[3218],{"type":33,"value":3219},"对输出做基础审查",{"type":27,"tag":45,"props":3221,"children":3222},{},[3223],{"type":33,"value":3224},"对敏感业务场景做兜底文案",{"type":27,"tag":251,"props":3226,"children":3228},{"id":3227},"_92-会话上下文管理",[3229],{"type":33,"value":3230},"9.2 会话上下文管理",{"type":27,"tag":35,"props":3232,"children":3233},{},[3234],{"type":33,"value":3235},"上下文越长，成本越高，响应越慢。",{"type":27,"tag":35,"props":3237,"children":3238},{},[3239],{"type":33,"value":3240},"所以要考虑：",{"type":27,"tag":76,"props":3242,"children":3243},{},[3244,3249,3254],{"type":27,"tag":45,"props":3245,"children":3246},{},[3247],{"type":33,"value":3248},"历史消息裁剪",{"type":27,"tag":45,"props":3250,"children":3251},{},[3252],{"type":33,"value":3253},"摘要压缩",{"type":27,"tag":45,"props":3255,"children":3256},{},[3257],{"type":33,"value":3258},"系统提示与用户消息分离",{"type":27,"tag":251,"props":3260,"children":3262},{"id":3261},"_93-可回放的调试能力",[3263],{"type":33,"value":3264},"9.3 可回放的调试能力",{"type":27,"tag":35,"props":3266,"children":3267},{},[3268],{"type":33,"value":3269},"遇到线上异常时，你最好能回放：",{"type":27,"tag":76,"props":3271,"children":3272},{},[3273,3278,3283,3288,3293],{"type":27,"tag":45,"props":3274,"children":3275},{},[3276],{"type":33,"value":3277},"用了哪个模型",{"type":27,"tag":45,"props":3279,"children":3280},{},[3281],{"type":33,"value":3282},"哪个 prompt 版本",{"type":27,"tag":45,"props":3284,"children":3285},{},[3286],{"type":33,"value":3287},"输入大概是什么",{"type":27,"tag":45,"props":3289,"children":3290},{},[3291],{"type":33,"value":3292},"token 用量如何",{"type":27,"tag":45,"props":3294,"children":3295},{},[3296],{"type":33,"value":3297},"为什么 fallback 了",{"type":27,"tag":35,"props":3299,"children":3300},{},[3301],{"type":33,"value":3302},"没有这些信息，AI 问题会比普通接口问题更难定位。",{"type":27,"tag":167,"props":3304,"children":3305},{},[],{"type":27,"tag":171,"props":3307,"children":3309},{"id":3308},"十一个团队级的接入顺序建议",[3310],{"type":33,"value":3311},"十、一个团队级的接入顺序建议",{"type":27,"tag":251,"props":3313,"children":3315},{"id":3314},"第-1-步先选一个低风险场景上线",[3316],{"type":33,"value":3317},"第 1 步：先选一个低风险场景上线",{"type":27,"tag":35,"props":3319,"children":3320},{},[3321],{"type":33,"value":284},{"type":27,"tag":76,"props":3323,"children":3324},{},[3325,3330,3335],{"type":27,"tag":45,"props":3326,"children":3327},{},[3328],{"type":33,"value":3329},"文本总结",{"type":27,"tag":45,"props":3331,"children":3332},{},[3333],{"type":33,"value":3334},"标签生成",{"type":27,"tag":45,"props":3336,"children":3337},{},[3338],{"type":33,"value":3339},"FAQ 初稿",{"type":27,"tag":35,"props":3341,"children":3342},{},[3343],{"type":33,"value":3344},"不要一上来就把 LLM 放进最核心、最严格实时的链路。",{"type":27,"tag":251,"props":3346,"children":3348},{"id":3347},"第-2-步统一调用层",[3349],{"type":33,"value":3350},"第 2 步：统一调用层",{"type":27,"tag":35,"props":3352,"children":3353},{},[3354],{"type":33,"value":3355},"不要让业务各自直连 provider。",{"type":27,"tag":251,"props":3357,"children":3359},{"id":3358},"第-3-步补结构化输出和校验",[3360],{"type":33,"value":3361},"第 3 步：补结构化输出和校验",{"type":27,"tag":35,"props":3363,"children":3364},{},[3365],{"type":33,"value":3366},"这一步能把 AI 功能从“看起来能用”升级到“系统能消费”。",{"type":27,"tag":251,"props":3368,"children":3370},{"id":3369},"第-4-步补流式体验和-fallback",[3371],{"type":33,"value":3372},"第 4 步：补流式体验和 fallback",{"type":27,"tag":35,"props":3374,"children":3375},{},[3376],{"type":33,"value":3377},"让体验和稳定性一起提升。",{"type":27,"tag":251,"props":3379,"children":3381},{"id":3380},"第-5-步把观测和成本统计纳入日常看板",[3382],{"type":33,"value":3383},"第 5 步：把观测和成本统计纳入日常看板",{"type":27,"tag":35,"props":3385,"children":3386},{},[3387],{"type":33,"value":3388},"否则你永远只能靠投诉发现问题。",{"type":27,"tag":167,"props":3390,"children":3391},{},[],{"type":27,"tag":171,"props":3393,"children":3395},{"id":3394},"十一给团队的-llm-api-集成检查清单",[3396],{"type":33,"value":3397},"十一、给团队的 LLM API 集成检查清单",{"type":27,"tag":251,"props":3399,"children":3401},{"id":3400},"模型与场景层",[3402],{"type":33,"value":3400},{"type":27,"tag":76,"props":3404,"children":3407},{"className":3405},[3406],"contains-task-list",[3408,3419,3428],{"type":27,"tag":45,"props":3409,"children":3412},{"className":3410},[3411],"task-list-item",[3413,3417],{"type":27,"tag":1214,"props":3414,"children":3416},{"disabled":18,"type":3415},"checkbox",[],{"type":33,"value":3418}," 是否先定义能力场景，再决定模型",{"type":27,"tag":45,"props":3420,"children":3422},{"className":3421},[3411],[3423,3426],{"type":27,"tag":1214,"props":3424,"children":3425},{"disabled":18,"type":3415},[],{"type":33,"value":3427}," 是否有默认模型和备用模型",{"type":27,"tag":45,"props":3429,"children":3431},{"className":3430},[3411],[3432,3435],{"type":27,"tag":1214,"props":3433,"children":3434},{"disabled":18,"type":3415},[],{"type":33,"value":3436}," 是否用自有样本做过评测",{"type":27,"tag":251,"props":3438,"children":3440},{"id":3439},"接入层",[3441],{"type":33,"value":3439},{"type":27,"tag":76,"props":3443,"children":3445},{"className":3444},[3406],[3446,3455,3464],{"type":27,"tag":45,"props":3447,"children":3449},{"className":3448},[3411],[3450,3453],{"type":27,"tag":1214,"props":3451,"children":3452},{"disabled":18,"type":3415},[],{"type":33,"value":3454}," 是否有统一 LLM Gateway",{"type":27,"tag":45,"props":3456,"children":3458},{"className":3457},[3411],[3459,3462],{"type":27,"tag":1214,"props":3460,"children":3461},{"disabled":18,"type":3415},[],{"type":33,"value":3463}," 是否统一处理超时、重试、fallback",{"type":27,"tag":45,"props":3465,"children":3467},{"className":3466},[3411],[3468,3471],{"type":27,"tag":1214,"props":3469,"children":3470},{"disabled":18,"type":3415},[],{"type":33,"value":3472}," 是否支持流式与非流式两种模式",{"type":27,"tag":251,"props":3474,"children":3476},{"id":3475},"输出层",[3477],{"type":33,"value":3475},{"type":27,"tag":76,"props":3479,"children":3481},{"className":3480},[3406],[3482,3491,3500],{"type":27,"tag":45,"props":3483,"children":3485},{"className":3484},[3411],[3486,3489],{"type":27,"tag":1214,"props":3487,"children":3488},{"disabled":18,"type":3415},[],{"type":33,"value":3490}," 是否对结构化输出做 schema 校验",{"type":27,"tag":45,"props":3492,"children":3494},{"className":3493},[3411],[3495,3498],{"type":27,"tag":1214,"props":3496,"children":3497},{"disabled":18,"type":3415},[],{"type":33,"value":3499}," 是否对 prompt 版本化管理",{"type":27,"tag":45,"props":3501,"children":3503},{"className":3502},[3411],[3504,3507],{"type":27,"tag":1214,"props":3505,"children":3506},{"disabled":18,"type":3415},[],{"type":33,"value":3508}," 是否能追踪模型、prompt、结果与错误",{"type":27,"tag":251,"props":3510,"children":3512},{"id":3511},"运营层",[3513],{"type":33,"value":3511},{"type":27,"tag":76,"props":3515,"children":3517},{"className":3516},[3406],[3518,3527,3536],{"type":27,"tag":45,"props":3519,"children":3521},{"className":3520},[3411],[3522,3525],{"type":27,"tag":1214,"props":3523,"children":3524},{"disabled":18,"type":3415},[],{"type":33,"value":3526}," 是否监控 token 成本和时延",{"type":27,"tag":45,"props":3528,"children":3530},{"className":3529},[3411],[3531,3534],{"type":27,"tag":1214,"props":3532,"children":3533},{"disabled":18,"type":3415},[],{"type":33,"value":3535}," 是否有限流和配额策略",{"type":27,"tag":45,"props":3537,"children":3539},{"className":3538},[3411],[3540,3543],{"type":27,"tag":1214,"props":3541,"children":3542},{"disabled":18,"type":3415},[],{"type":33,"value":3544}," 是否有异常兜底和降级预案",{"type":27,"tag":167,"props":3546,"children":3547},{},[],{"type":27,"tag":171,"props":3549,"children":3551},{"id":3550},"总结",[3552],{"type":33,"value":3550},{"type":27,"tag":35,"props":3554,"children":3555},{},[3556],{"type":33,"value":3557},"把 LLM API 集成讲透，可以收敛成 5 句话：",{"type":27,"tag":41,"props":3559,"children":3560},{},[3561,3569,3577,3585,3593],{"type":27,"tag":45,"props":3562,"children":3563},{},[3564],{"type":27,"tag":212,"props":3565,"children":3566},{},[3567],{"type":33,"value":3568},"你接入的不是某个模型，而是一种产品能力。",{"type":27,"tag":45,"props":3570,"children":3571},{},[3572],{"type":27,"tag":212,"props":3573,"children":3574},{},[3575],{"type":33,"value":3576},"模型选型要同时看质量、时延、成本和稳定性。",{"type":27,"tag":45,"props":3578,"children":3579},{},[3580],{"type":27,"tag":212,"props":3581,"children":3582},{},[3583],{"type":33,"value":3584},"统一 Gateway 比每个业务自己直连 provider 更重要。",{"type":27,"tag":45,"props":3586,"children":3587},{},[3588],{"type":27,"tag":212,"props":3589,"children":3590},{},[3591],{"type":33,"value":3592},"结构化输出、流式响应、fallback 和观测，决定它能不能上线。",{"type":27,"tag":45,"props":3594,"children":3595},{},[3596],{"type":27,"tag":212,"props":3597,"children":3598},{},[3599],{"type":33,"value":3600},"AI 集成的终点不是“能回答”，而是“可控、可测、可迭代”。",{"type":27,"tag":35,"props":3602,"children":3603},{},[3604],{"type":33,"value":3605},"如果你只记住一句话，我希望是这一句：",{"type":27,"tag":115,"props":3607,"children":3608},{},[3609],{"type":27,"tag":35,"props":3610,"children":3611},{},[3612],{"type":33,"value":3613},"一个真正能落地的 LLM 集成，不是把模型接进代码，而是把不确定性关进系统边界里。",{"type":27,"tag":35,"props":3615,"children":3616},{},[3617],{"type":33,"value":3618},"否则 demo 一跑很惊艳，上线之后最先学会的通常不是智能，而是——",{"type":27,"tag":35,"props":3620,"children":3621},{},[3622],{"type":27,"tag":212,"props":3623,"children":3624},{},[3625],{"type":33,"value":3626},"超时。",{"type":27,"tag":3628,"props":3629,"children":3630},"style",{},[3631],{"type":33,"value":3632},"html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}",{"title":7,"searchDepth":652,"depth":652,"links":3634},[3635,3639,3644,3648,3653,3657,3662,3667,3671,3676,3683,3689],{"id":173,"depth":643,"text":176,"children":3636},[3637,3638],{"id":253,"depth":652,"text":256},{"id":305,"depth":652,"text":308},{"id":454,"depth":643,"text":457,"children":3640},[3641,3642,3643],{"id":460,"depth":652,"text":463},{"id":583,"depth":652,"text":586},{"id":617,"depth":652,"text":620},{"id":720,"depth":643,"text":723,"children":3645},[3646,3647],{"id":769,"depth":652,"text":772},{"id":817,"depth":652,"text":820},{"id":1443,"depth":643,"text":1446,"children":3649},[3650,3651,3652],{"id":1472,"depth":652,"text":1475},{"id":1567,"depth":652,"text":1570},{"id":1606,"depth":652,"text":1609},{"id":1660,"depth":643,"text":1663,"children":3654},[3655,3656],{"id":1699,"depth":652,"text":1702},{"id":1887,"depth":652,"text":1890},{"id":2196,"depth":643,"text":2199,"children":3658},[3659,3660,3661],{"id":2207,"depth":652,"text":2210},{"id":2242,"depth":652,"text":2245},{"id":2604,"depth":652,"text":2607},{"id":2636,"depth":643,"text":2639,"children":3663},[3664,3665,3666],{"id":2675,"depth":652,"text":2678},{"id":2709,"depth":652,"text":2712},{"id":2788,"depth":652,"text":2791},{"id":3011,"depth":643,"text":3014,"children":3668},[3669,3670],{"id":3027,"depth":652,"text":3030},{"id":3146,"depth":652,"text":3149},{"id":3192,"depth":643,"text":3195,"children":3672},[3673,3674,3675],{"id":3198,"depth":652,"text":3201},{"id":3227,"depth":652,"text":3230},{"id":3261,"depth":652,"text":3264},{"id":3308,"depth":643,"text":3311,"children":3677},[3678,3679,3680,3681,3682],{"id":3314,"depth":652,"text":3317},{"id":3347,"depth":652,"text":3350},{"id":3358,"depth":652,"text":3361},{"id":3369,"depth":652,"text":3372},{"id":3380,"depth":652,"text":3383},{"id":3394,"depth":643,"text":3397,"children":3684},[3685,3686,3687,3688],{"id":3400,"depth":652,"text":3400},{"id":3439,"depth":652,"text":3439},{"id":3475,"depth":652,"text":3475},{"id":3511,"depth":652,"text":3511},{"id":3550,"depth":643,"text":3550},"markdown","content:topics:engineering:llm-api-integration-complete-guide.md","topics/engineering/llm-api-integration-complete-guide.md","topics/engineering/llm-api-integration-complete-guide","md",1777109947922]