[{"data":1,"prerenderedAt":3474},["ShallowReactive",2],{"content-/topics/engineering/token-management-cost-control-guide":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"category":5,"tags":11,"author":17,"featured":18,"series":19,"seriesOrder":20,"readingTime":21,"image":22,"body":23,"_type":3469,"_id":3470,"_source":1178,"_file":3471,"_stem":3472,"_extension":3473},"/topics/engineering/token-management-cost-control-guide","engineering",false,"","Token 管理与成本控制完全指南：别让 AI 账单悄悄长成事故","做 AI 产品时，成本失控往往不是一夜之间发生的，而是被长上下文、重复调用、无效输出和缺少观测一点点堆出来的。本文系统讲清 token 预算、计费估算、上下文裁剪、缓存复用、模型分层与成本告警。","2026-03-08",[12,13,14,15,16],"Token管理","成本控制","AI工程","LLM","FinOps","小明",true,"ai-integration-and-intelligent-applications",2,17,"/images/articles/token-management-cost-control-guide-cover.jpg",{"type":24,"children":25,"toc":3399},"root",[26,34,40,60,65,70,103,108,113,126,131,136,165,170,174,181,186,199,204,232,239,244,267,272,278,283,294,299,304,327,330,336,341,346,352,432,438,443,466,472,798,803,806,812,817,823,830,835,853,859,864,882,887,893,898,921,927,932,950,955,961,1048,1053,1056,1062,1068,1073,1078,1101,1107,1506,1511,1522,1528,1533,1538,1561,1566,1569,1575,1580,1586,1590,1664,1670,1675,1693,1698,1704,1901,1913,1916,1922,1927,1933,1961,1967,1985,1991,2274,2279,2297,2302,2305,2311,2317,2340,2346,2352,2357,2363,2368,2489,2494,2497,2503,2509,2570,2576,2594,2600,2605,2628,2633,2636,2642,2647,2670,2676,2759,2765,2944,2949,2972,2975,2981,2986,2991,2997,3020,3026,3031,3039,3044,3067,3072,3077,3100,3103,3109,3115,3120,3126,3131,3137,3142,3148,3153,3159,3164,3167,3173,3178,3212,3217,3266,3271,3311,3314,3319,3324,3367,3372,3380,3385,3393],{"type":27,"tag":28,"props":29,"children":31},"element","h1",{"id":30},"token-管理与成本控制完全指南别让-ai-账单悄悄长成事故",[32],{"type":33,"value":8},"text",{"type":27,"tag":35,"props":36,"children":37},"p",{},[38],{"type":33,"value":39},"AI 产品早期有一种很容易让人上头的错觉：",{"type":27,"tag":41,"props":42,"children":43},"ul",{},[44,50,55],{"type":27,"tag":45,"props":46,"children":47},"li",{},[48],{"type":33,"value":49},"一个请求也就几分钱",{"type":27,"tag":45,"props":51,"children":52},{},[53],{"type":33,"value":54},"用户还没那么多",{"type":27,"tag":45,"props":56,"children":57},{},[58],{"type":33,"value":59},"先把体验做出来，成本以后再说",{"type":27,"tag":35,"props":61,"children":62},{},[63],{"type":33,"value":64},"这句话在 demo 阶段没问题。",{"type":27,"tag":35,"props":66,"children":67},{},[68],{"type":33,"value":69},"一旦功能开始被真实用户持续使用，成本会以一种很不戏剧化、但很致命的方式上涨：",{"type":27,"tag":41,"props":71,"children":72},{},[73,78,83,88,93,98],{"type":27,"tag":45,"props":74,"children":75},{},[76],{"type":33,"value":77},"prompt 越写越长",{"type":27,"tag":45,"props":79,"children":80},{},[81],{"type":33,"value":82},"上下文一轮轮累加",{"type":27,"tag":45,"props":84,"children":85},{},[86],{"type":33,"value":87},"同一个用户问题反复重试",{"type":27,"tag":45,"props":89,"children":90},{},[91],{"type":33,"value":92},"输出明明只用 100 字，却让模型生成 800 字",{"type":27,"tag":45,"props":94,"children":95},{},[96],{"type":33,"value":97},"分类、总结、改写、抽取分别调了四次模型",{"type":27,"tag":45,"props":99,"children":100},{},[101],{"type":33,"value":102},"没人看 token 用量，也没人知道钱到底花在哪",{"type":27,"tag":35,"props":104,"children":105},{},[106],{"type":33,"value":107},"最后到月底，财务给你发一张账单，大家才第一次认真打开监控后台。",{"type":27,"tag":35,"props":109,"children":110},{},[111],{"type":33,"value":112},"这时候往往已经不是“优化一下就好”的程度，而是：",{"type":27,"tag":114,"props":115,"children":116},"blockquote",{},[117],{"type":27,"tag":35,"props":118,"children":119},{},[120],{"type":27,"tag":121,"props":122,"children":123},"strong",{},[124],{"type":33,"value":125},"AI 成本开始反噬产品模型。",{"type":27,"tag":35,"props":127,"children":128},{},[129],{"type":33,"value":130},"所以 Token 管理和成本控制，不是上线后的收尾工作，而是 AI 工程的核心部分。",{"type":27,"tag":35,"props":132,"children":133},{},[134],{"type":33,"value":135},"这篇文章就围绕这个主题展开：",{"type":27,"tag":137,"props":138,"children":139},"ol",{},[140,145,150,155,160],{"type":27,"tag":45,"props":141,"children":142},{},[143],{"type":33,"value":144},"token 成本为什么总比团队预估高",{"type":27,"tag":45,"props":146,"children":147},{},[148],{"type":33,"value":149},"怎样建立单位请求、单位用户、单位功能的成本视角",{"type":27,"tag":45,"props":151,"children":152},{},[153],{"type":33,"value":154},"如何通过上下文裁剪、缓存复用、模型分层和输出约束降低成本",{"type":27,"tag":45,"props":156,"children":157},{},[158],{"type":33,"value":159},"怎样做预算、告警和成本归因",{"type":27,"tag":45,"props":161,"children":162},{},[163],{"type":33,"value":164},"什么时候该追求更低成本，什么时候不该为了省钱把体验做坏",{"type":27,"tag":35,"props":166,"children":167},{},[168],{"type":33,"value":169},"如果你已经开始做 AI 功能，这篇越早看越值钱。",{"type":27,"tag":171,"props":172,"children":173},"hr",{},[],{"type":27,"tag":175,"props":176,"children":178},"h2",{"id":177},"一先认清现实ai-成本失控大多不是因为模型贵而是因为调用方式浪费",[179],{"type":33,"value":180},"一、先认清现实：AI 成本失控，大多不是因为模型贵，而是因为调用方式浪费",{"type":27,"tag":35,"props":182,"children":183},{},[184],{"type":33,"value":185},"很多团队第一次看到账单，第一反应是：",{"type":27,"tag":41,"props":187,"children":188},{},[189,194],{"type":27,"tag":45,"props":190,"children":191},{},[192],{"type":33,"value":193},"这个模型太贵了",{"type":27,"tag":45,"props":195,"children":196},{},[197],{"type":33,"value":198},"换便宜模型吧",{"type":27,"tag":35,"props":200,"children":201},{},[202],{"type":33,"value":203},"模型单价当然重要，但多数成本问题，真正的源头是：",{"type":27,"tag":41,"props":205,"children":206},{},[207,212,217,222,227],{"type":27,"tag":45,"props":208,"children":209},{},[210],{"type":33,"value":211},"不必要的上下文太长",{"type":27,"tag":45,"props":213,"children":214},{},[215],{"type":33,"value":216},"重复调用太多",{"type":27,"tag":45,"props":218,"children":219},{},[220],{"type":33,"value":221},"输出太啰嗦",{"type":27,"tag":45,"props":223,"children":224},{},[225],{"type":33,"value":226},"没有做结果复用",{"type":27,"tag":45,"props":228,"children":229},{},[230],{"type":33,"value":231},"没有分场景选模型",{"type":27,"tag":233,"props":234,"children":236},"h3",{"id":235},"_11-一个很典型的浪费例子",[237],{"type":33,"value":238},"1.1 一个很典型的浪费例子",{"type":27,"tag":35,"props":240,"children":241},{},[242],{"type":33,"value":243},"假设用户想要一个 3 行摘要，但你的系统做了这些事：",{"type":27,"tag":137,"props":245,"children":246},{},[247,252,257,262],{"type":27,"tag":45,"props":248,"children":249},{},[250],{"type":33,"value":251},"把最近 20 条历史对话都带上",{"type":27,"tag":45,"props":253,"children":254},{},[255],{"type":33,"value":256},"再附上大段系统说明",{"type":27,"tag":45,"props":258,"children":259},{},[260],{"type":33,"value":261},"输出要求没限制，模型生成 700 token",{"type":27,"tag":45,"props":263,"children":264},{},[265],{"type":33,"value":266},"结果格式不稳定，又重试一次",{"type":27,"tag":35,"props":268,"children":269},{},[270],{"type":33,"value":271},"最后为了一个很简单的功能，成本可能放大到原本的 5~10 倍。",{"type":27,"tag":233,"props":273,"children":275},{"id":274},"_12-成本问题的本质",[276],{"type":33,"value":277},"1.2 成本问题的本质",{"type":27,"tag":35,"props":279,"children":280},{},[281],{"type":33,"value":282},"成本控制最核心的不是“省钱”本身，而是：",{"type":27,"tag":114,"props":284,"children":285},{},[286],{"type":27,"tag":35,"props":287,"children":288},{},[289],{"type":27,"tag":121,"props":290,"children":291},{},[292],{"type":33,"value":293},"让每一份 token 支出都对应到真实业务价值。",{"type":27,"tag":35,"props":295,"children":296},{},[297],{"type":33,"value":298},"如果某个功能很贵，但能显著提高转化或续费，它未必有问题。",{"type":27,"tag":35,"props":300,"children":301},{},[302],{"type":33,"value":303},"真正有问题的是：",{"type":27,"tag":41,"props":305,"children":306},{},[307,312,317,322],{"type":27,"tag":45,"props":308,"children":309},{},[310],{"type":33,"value":311},"花了很多 token",{"type":27,"tag":45,"props":313,"children":314},{},[315],{"type":33,"value":316},"用户没有明显感知",{"type":27,"tag":45,"props":318,"children":319},{},[320],{"type":33,"value":321},"业务结果没有改善",{"type":27,"tag":45,"props":323,"children":324},{},[325],{"type":33,"value":326},"团队还不知道钱花去了哪里",{"type":27,"tag":171,"props":328,"children":329},{},[],{"type":27,"tag":175,"props":331,"children":333},{"id":332},"二先建立单位成本视角不要只看月账单",[334],{"type":33,"value":335},"二、先建立单位成本视角：不要只看月账单",{"type":27,"tag":35,"props":337,"children":338},{},[339],{"type":33,"value":340},"月账单是结果，不是分析入口。",{"type":27,"tag":35,"props":342,"children":343},{},[344],{"type":33,"value":345},"更有用的做法，是把成本拆到多个粒度：",{"type":27,"tag":233,"props":347,"children":349},{"id":348},"_21-四层成本视角",[350],{"type":33,"value":351},"2.1 四层成本视角",{"type":27,"tag":353,"props":354,"children":355},"table",{},[356,375],{"type":27,"tag":357,"props":358,"children":359},"thead",{},[360],{"type":27,"tag":361,"props":362,"children":363},"tr",{},[364,370],{"type":27,"tag":365,"props":366,"children":367},"th",{},[368],{"type":33,"value":369},"粒度",{"type":27,"tag":365,"props":371,"children":372},{},[373],{"type":33,"value":374},"要回答的问题",{"type":27,"tag":376,"props":377,"children":378},"tbody",{},[379,393,406,419],{"type":27,"tag":361,"props":380,"children":381},{},[382,388],{"type":27,"tag":383,"props":384,"children":385},"td",{},[386],{"type":33,"value":387},"单次调用",{"type":27,"tag":383,"props":389,"children":390},{},[391],{"type":33,"value":392},"一个请求多少钱",{"type":27,"tag":361,"props":394,"children":395},{},[396,401],{"type":27,"tag":383,"props":397,"children":398},{},[399],{"type":33,"value":400},"单个功能",{"type":27,"tag":383,"props":402,"children":403},{},[404],{"type":33,"value":405},"总结、问答、抽取分别多少钱",{"type":27,"tag":361,"props":407,"children":408},{},[409,414],{"type":27,"tag":383,"props":410,"children":411},{},[412],{"type":33,"value":413},"单用户",{"type":27,"tag":383,"props":415,"children":416},{},[417],{"type":33,"value":418},"一个活跃用户平均消耗多少",{"type":27,"tag":361,"props":420,"children":421},{},[422,427],{"type":27,"tag":383,"props":423,"children":424},{},[425],{"type":33,"value":426},"单业务结果",{"type":27,"tag":383,"props":428,"children":429},{},[430],{"type":33,"value":431},"每完成一次转化或一次成功任务，成本是多少",{"type":27,"tag":233,"props":433,"children":435},{"id":434},"_22-为什么这很重要",[436],{"type":33,"value":437},"2.2 为什么这很重要",{"type":27,"tag":35,"props":439,"children":440},{},[441],{"type":33,"value":442},"因为只有这样你才能判断：",{"type":27,"tag":41,"props":444,"children":445},{},[446,451,456,461],{"type":27,"tag":45,"props":447,"children":448},{},[449],{"type":33,"value":450},"哪个功能最烧钱",{"type":27,"tag":45,"props":452,"children":453},{},[454],{"type":33,"value":455},"哪类用户最贵",{"type":27,"tag":45,"props":457,"children":458},{},[459],{"type":33,"value":460},"是输入成本高，还是输出成本高",{"type":27,"tag":45,"props":462,"children":463},{},[464],{"type":33,"value":465},"某次版本更新是否让成本突然上涨",{"type":27,"tag":233,"props":467,"children":469},{"id":468},"_23-一个简单的成本估算函数",[470],{"type":33,"value":471},"2.3 一个简单的成本估算函数",{"type":27,"tag":473,"props":474,"children":478},"pre",{"className":475,"code":476,"language":477,"meta":7,"style":7},"language-ts shiki shiki-themes github-dark","type Usage = {\n  promptTokens: number\n  completionTokens: number\n}\n\ntype Pricing = {\n  inputPer1k: number\n  outputPer1k: number\n}\n\nexport function estimateCost(usage: Usage, pricing: Pricing) {\n  return (\n    (usage.promptTokens / 1000) * pricing.inputPer1k +\n    (usage.completionTokens / 1000) * pricing.outputPer1k\n  )\n}\n","ts",[479],{"type":27,"tag":480,"props":481,"children":482},"code",{"__ignoreMap":7},[483,512,532,549,558,567,588,605,622,630,638,698,712,751,781,790],{"type":27,"tag":484,"props":485,"children":488},"span",{"class":486,"line":487},"line",1,[489,495,501,506],{"type":27,"tag":484,"props":490,"children":492},{"style":491},"--shiki-default:#F97583",[493],{"type":33,"value":494},"type",{"type":27,"tag":484,"props":496,"children":498},{"style":497},"--shiki-default:#B392F0",[499],{"type":33,"value":500}," Usage",{"type":27,"tag":484,"props":502,"children":503},{"style":491},[504],{"type":33,"value":505}," =",{"type":27,"tag":484,"props":507,"children":509},{"style":508},"--shiki-default:#E1E4E8",[510],{"type":33,"value":511}," {\n",{"type":27,"tag":484,"props":513,"children":514},{"class":486,"line":20},[515,521,526],{"type":27,"tag":484,"props":516,"children":518},{"style":517},"--shiki-default:#FFAB70",[519],{"type":33,"value":520},"  promptTokens",{"type":27,"tag":484,"props":522,"children":523},{"style":491},[524],{"type":33,"value":525},":",{"type":27,"tag":484,"props":527,"children":529},{"style":528},"--shiki-default:#79B8FF",[530],{"type":33,"value":531}," number\n",{"type":27,"tag":484,"props":533,"children":535},{"class":486,"line":534},3,[536,541,545],{"type":27,"tag":484,"props":537,"children":538},{"style":517},[539],{"type":33,"value":540},"  completionTokens",{"type":27,"tag":484,"props":542,"children":543},{"style":491},[544],{"type":33,"value":525},{"type":27,"tag":484,"props":546,"children":547},{"style":528},[548],{"type":33,"value":531},{"type":27,"tag":484,"props":550,"children":552},{"class":486,"line":551},4,[553],{"type":27,"tag":484,"props":554,"children":555},{"style":508},[556],{"type":33,"value":557},"}\n",{"type":27,"tag":484,"props":559,"children":561},{"class":486,"line":560},5,[562],{"type":27,"tag":484,"props":563,"children":564},{"emptyLinePlaceholder":18},[565],{"type":33,"value":566},"\n",{"type":27,"tag":484,"props":568,"children":570},{"class":486,"line":569},6,[571,575,580,584],{"type":27,"tag":484,"props":572,"children":573},{"style":491},[574],{"type":33,"value":494},{"type":27,"tag":484,"props":576,"children":577},{"style":497},[578],{"type":33,"value":579}," Pricing",{"type":27,"tag":484,"props":581,"children":582},{"style":491},[583],{"type":33,"value":505},{"type":27,"tag":484,"props":585,"children":586},{"style":508},[587],{"type":33,"value":511},{"type":27,"tag":484,"props":589,"children":591},{"class":486,"line":590},7,[592,597,601],{"type":27,"tag":484,"props":593,"children":594},{"style":517},[595],{"type":33,"value":596},"  inputPer1k",{"type":27,"tag":484,"props":598,"children":599},{"style":491},[600],{"type":33,"value":525},{"type":27,"tag":484,"props":602,"children":603},{"style":528},[604],{"type":33,"value":531},{"type":27,"tag":484,"props":606,"children":608},{"class":486,"line":607},8,[609,614,618],{"type":27,"tag":484,"props":610,"children":611},{"style":517},[612],{"type":33,"value":613},"  outputPer1k",{"type":27,"tag":484,"props":615,"children":616},{"style":491},[617],{"type":33,"value":525},{"type":27,"tag":484,"props":619,"children":620},{"style":528},[621],{"type":33,"value":531},{"type":27,"tag":484,"props":623,"children":625},{"class":486,"line":624},9,[626],{"type":27,"tag":484,"props":627,"children":628},{"style":508},[629],{"type":33,"value":557},{"type":27,"tag":484,"props":631,"children":633},{"class":486,"line":632},10,[634],{"type":27,"tag":484,"props":635,"children":636},{"emptyLinePlaceholder":18},[637],{"type":33,"value":566},{"type":27,"tag":484,"props":639,"children":641},{"class":486,"line":640},11,[642,647,652,657,662,667,671,675,680,685,689,693],{"type":27,"tag":484,"props":643,"children":644},{"style":491},[645],{"type":33,"value":646},"export",{"type":27,"tag":484,"props":648,"children":649},{"style":491},[650],{"type":33,"value":651}," function",{"type":27,"tag":484,"props":653,"children":654},{"style":497},[655],{"type":33,"value":656}," estimateCost",{"type":27,"tag":484,"props":658,"children":659},{"style":508},[660],{"type":33,"value":661},"(",{"type":27,"tag":484,"props":663,"children":664},{"style":517},[665],{"type":33,"value":666},"usage",{"type":27,"tag":484,"props":668,"children":669},{"style":491},[670],{"type":33,"value":525},{"type":27,"tag":484,"props":672,"children":673},{"style":497},[674],{"type":33,"value":500},{"type":27,"tag":484,"props":676,"children":677},{"style":508},[678],{"type":33,"value":679},", ",{"type":27,"tag":484,"props":681,"children":682},{"style":517},[683],{"type":33,"value":684},"pricing",{"type":27,"tag":484,"props":686,"children":687},{"style":491},[688],{"type":33,"value":525},{"type":27,"tag":484,"props":690,"children":691},{"style":497},[692],{"type":33,"value":579},{"type":27,"tag":484,"props":694,"children":695},{"style":508},[696],{"type":33,"value":697},") {\n",{"type":27,"tag":484,"props":699,"children":701},{"class":486,"line":700},12,[702,707],{"type":27,"tag":484,"props":703,"children":704},{"style":491},[705],{"type":33,"value":706},"  return",{"type":27,"tag":484,"props":708,"children":709},{"style":508},[710],{"type":33,"value":711}," (\n",{"type":27,"tag":484,"props":713,"children":715},{"class":486,"line":714},13,[716,721,726,731,736,741,746],{"type":27,"tag":484,"props":717,"children":718},{"style":508},[719],{"type":33,"value":720},"    (usage.promptTokens ",{"type":27,"tag":484,"props":722,"children":723},{"style":491},[724],{"type":33,"value":725},"/",{"type":27,"tag":484,"props":727,"children":728},{"style":528},[729],{"type":33,"value":730}," 1000",{"type":27,"tag":484,"props":732,"children":733},{"style":508},[734],{"type":33,"value":735},") ",{"type":27,"tag":484,"props":737,"children":738},{"style":491},[739],{"type":33,"value":740},"*",{"type":27,"tag":484,"props":742,"children":743},{"style":508},[744],{"type":33,"value":745}," pricing.inputPer1k ",{"type":27,"tag":484,"props":747,"children":748},{"style":491},[749],{"type":33,"value":750},"+\n",{"type":27,"tag":484,"props":752,"children":754},{"class":486,"line":753},14,[755,760,764,768,772,776],{"type":27,"tag":484,"props":756,"children":757},{"style":508},[758],{"type":33,"value":759},"    (usage.completionTokens ",{"type":27,"tag":484,"props":761,"children":762},{"style":491},[763],{"type":33,"value":725},{"type":27,"tag":484,"props":765,"children":766},{"style":528},[767],{"type":33,"value":730},{"type":27,"tag":484,"props":769,"children":770},{"style":508},[771],{"type":33,"value":735},{"type":27,"tag":484,"props":773,"children":774},{"style":491},[775],{"type":33,"value":740},{"type":27,"tag":484,"props":777,"children":778},{"style":508},[779],{"type":33,"value":780}," pricing.outputPer1k\n",{"type":27,"tag":484,"props":782,"children":784},{"class":486,"line":783},15,[785],{"type":27,"tag":484,"props":786,"children":787},{"style":508},[788],{"type":33,"value":789},"  )\n",{"type":27,"tag":484,"props":791,"children":793},{"class":486,"line":792},16,[794],{"type":27,"tag":484,"props":795,"children":796},{"style":508},[797],{"type":33,"value":557},{"type":27,"tag":35,"props":799,"children":800},{},[801],{"type":33,"value":802},"这个函数不复杂，但非常值得统一收口到调用层。因为后面所有的预算、归因、告警，都是从这里长出来的。",{"type":27,"tag":171,"props":804,"children":805},{},[],{"type":27,"tag":175,"props":807,"children":809},{"id":808},"三你真正要盯的不只是-token-总量而是-token-结构",[810],{"type":33,"value":811},"三、你真正要盯的，不只是 token 总量，而是 token 结构",{"type":27,"tag":35,"props":813,"children":814},{},[815],{"type":33,"value":816},"一个请求贵，可能贵在完全不同的地方。",{"type":27,"tag":233,"props":818,"children":820},{"id":819},"_31-常见的四种成本结构",[821],{"type":33,"value":822},"3.1 常见的四种成本结构",{"type":27,"tag":824,"props":825,"children":827},"h4",{"id":826},"a-输入太长",[828],{"type":33,"value":829},"A. 输入太长",{"type":27,"tag":35,"props":831,"children":832},{},[833],{"type":33,"value":834},"最常见原因：",{"type":27,"tag":41,"props":836,"children":837},{},[838,843,848],{"type":27,"tag":45,"props":839,"children":840},{},[841],{"type":33,"value":842},"把整段上下文原样传给模型",{"type":27,"tag":45,"props":844,"children":845},{},[846],{"type":33,"value":847},"系统提示写得又长又重复",{"type":27,"tag":45,"props":849,"children":850},{},[851],{"type":33,"value":852},"历史会话不做裁剪",{"type":27,"tag":824,"props":854,"children":856},{"id":855},"b-输出太长",[857],{"type":33,"value":858},"B. 输出太长",{"type":27,"tag":35,"props":860,"children":861},{},[862],{"type":33,"value":863},"很多场景其实只要：",{"type":27,"tag":41,"props":865,"children":866},{},[867,872,877],{"type":27,"tag":45,"props":868,"children":869},{},[870],{"type":33,"value":871},"分类标签",{"type":27,"tag":45,"props":873,"children":874},{},[875],{"type":33,"value":876},"3 条摘要",{"type":27,"tag":45,"props":878,"children":879},{},[880],{"type":33,"value":881},"一段简短回复",{"type":27,"tag":35,"props":883,"children":884},{},[885],{"type":33,"value":886},"结果系统没有约束，模型就会“热情发挥”。",{"type":27,"tag":824,"props":888,"children":890},{"id":889},"c-同一请求被多次调用",[891],{"type":33,"value":892},"C. 同一请求被多次调用",{"type":27,"tag":35,"props":894,"children":895},{},[896],{"type":33,"value":897},"例如：",{"type":27,"tag":41,"props":899,"children":900},{},[901,906,911,916],{"type":27,"tag":45,"props":902,"children":903},{},[904],{"type":33,"value":905},"失败重试",{"type":27,"tag":45,"props":907,"children":908},{},[909],{"type":33,"value":910},"流式中断后重新发起",{"type":27,"tag":45,"props":912,"children":913},{},[914],{"type":33,"value":915},"前端重复提交",{"type":27,"tag":45,"props":917,"children":918},{},[919],{"type":33,"value":920},"后端多个模块分别调模型做相似任务",{"type":27,"tag":824,"props":922,"children":924},{"id":923},"d-同一结果没有复用",[925],{"type":33,"value":926},"D. 同一结果没有复用",{"type":27,"tag":35,"props":928,"children":929},{},[930],{"type":33,"value":931},"像：",{"type":27,"tag":41,"props":933,"children":934},{},[935,940,945],{"type":27,"tag":45,"props":936,"children":937},{},[938],{"type":33,"value":939},"热门内容摘要",{"type":27,"tag":45,"props":941,"children":942},{},[943],{"type":33,"value":944},"公共问答结果",{"type":27,"tag":45,"props":946,"children":947},{},[948],{"type":33,"value":949},"标准化标签提取",{"type":27,"tag":35,"props":951,"children":952},{},[953],{"type":33,"value":954},"这些结果如果每次都重算，成本会平白增加。",{"type":27,"tag":233,"props":956,"children":958},{"id":957},"_32-一个更有意义的观测表",[959],{"type":33,"value":960},"3.2 一个更有意义的观测表",{"type":27,"tag":353,"props":962,"children":963},{},[964,980],{"type":27,"tag":357,"props":965,"children":966},{},[967],{"type":27,"tag":361,"props":968,"children":969},{},[970,975],{"type":27,"tag":365,"props":971,"children":972},{},[973],{"type":33,"value":974},"指标",{"type":27,"tag":365,"props":976,"children":977},{},[978],{"type":33,"value":979},"含义",{"type":27,"tag":376,"props":981,"children":982},{},[983,996,1009,1022,1035],{"type":27,"tag":361,"props":984,"children":985},{},[986,991],{"type":27,"tag":383,"props":987,"children":988},{},[989],{"type":33,"value":990},"平均输入 token",{"type":27,"tag":383,"props":992,"children":993},{},[994],{"type":33,"value":995},"prompt 是否膨胀",{"type":27,"tag":361,"props":997,"children":998},{},[999,1004],{"type":27,"tag":383,"props":1000,"children":1001},{},[1002],{"type":33,"value":1003},"平均输出 token",{"type":27,"tag":383,"props":1005,"children":1006},{},[1007],{"type":33,"value":1008},"输出是否失控",{"type":27,"tag":361,"props":1010,"children":1011},{},[1012,1017],{"type":27,"tag":383,"props":1013,"children":1014},{},[1015],{"type":33,"value":1016},"单次请求重试率",{"type":27,"tag":383,"props":1018,"children":1019},{},[1020],{"type":33,"value":1021},"稳定性是否拖累成本",{"type":27,"tag":361,"props":1023,"children":1024},{},[1025,1030],{"type":27,"tag":383,"props":1026,"children":1027},{},[1028],{"type":33,"value":1029},"缓存命中率",{"type":27,"tag":383,"props":1031,"children":1032},{},[1033],{"type":33,"value":1034},"是否复用已有结果",{"type":27,"tag":361,"props":1036,"children":1037},{},[1038,1043],{"type":27,"tag":383,"props":1039,"children":1040},{},[1041],{"type":33,"value":1042},"长上下文请求占比",{"type":27,"tag":383,"props":1044,"children":1045},{},[1046],{"type":33,"value":1047},"是否需要上下文治理",{"type":27,"tag":35,"props":1049,"children":1050},{},[1051],{"type":33,"value":1052},"这组指标通常比“本月花了多少钱”更能指导优化动作。",{"type":27,"tag":171,"props":1054,"children":1055},{},[],{"type":27,"tag":175,"props":1057,"children":1059},{"id":1058},"四最值钱的降本手段通常不是换模型而是少传少调少生成",[1060],{"type":33,"value":1061},"四、最值钱的降本手段，通常不是换模型，而是少传、少调、少生成",{"type":27,"tag":233,"props":1063,"children":1065},{"id":1064},"_41-上下文裁剪第一优先级",[1066],{"type":33,"value":1067},"4.1 上下文裁剪：第一优先级",{"type":27,"tag":35,"props":1069,"children":1070},{},[1071],{"type":33,"value":1072},"绝大多数 AI 成本优化，最先该做的是上下文治理。",{"type":27,"tag":824,"props":1074,"children":1076},{"id":1075},"常见做法",[1077],{"type":33,"value":1075},{"type":27,"tag":41,"props":1079,"children":1080},{},[1081,1086,1091,1096],{"type":27,"tag":45,"props":1082,"children":1083},{},[1084],{"type":33,"value":1085},"只保留最近 N 轮对话",{"type":27,"tag":45,"props":1087,"children":1088},{},[1089],{"type":33,"value":1090},"对历史对话做摘要压缩",{"type":27,"tag":45,"props":1092,"children":1093},{},[1094],{"type":33,"value":1095},"把系统提示拆成固定模板和场景变量",{"type":27,"tag":45,"props":1097,"children":1098},{},[1099],{"type":33,"value":1100},"删除与当前任务无关的信息",{"type":27,"tag":233,"props":1102,"children":1104},{"id":1103},"_42-一个简单的消息裁剪示例",[1105],{"type":33,"value":1106},"4.2 一个简单的消息裁剪示例",{"type":27,"tag":473,"props":1108,"children":1110},{"className":475,"code":1109,"language":477,"meta":7,"style":7},"type Message = { role: 'system' | 'user' | 'assistant'; content: string }\n\nexport function trimMessages(messages: Message[], maxChars = 8000) {\n  const reversed = [...messages].reverse()\n  const kept: Message[] = []\n  let total = 0\n\n  for (const msg of reversed) {\n    total += msg.content.length\n    if (total > maxChars) break\n    kept.push(msg)\n  }\n\n  return kept.reverse()\n}\n",[1111],{"type":27,"tag":480,"props":1112,"children":1113},{"__ignoreMap":7},[1114,1193,1200,1256,1298,1333,1355,1362,1395,1418,1446,1464,1472,1479,1499],{"type":27,"tag":484,"props":1115,"children":1116},{"class":486,"line":487},[1117,1121,1126,1130,1135,1140,1144,1150,1155,1160,1164,1169,1174,1179,1183,1188],{"type":27,"tag":484,"props":1118,"children":1119},{"style":491},[1120],{"type":33,"value":494},{"type":27,"tag":484,"props":1122,"children":1123},{"style":497},[1124],{"type":33,"value":1125}," Message",{"type":27,"tag":484,"props":1127,"children":1128},{"style":491},[1129],{"type":33,"value":505},{"type":27,"tag":484,"props":1131,"children":1132},{"style":508},[1133],{"type":33,"value":1134}," { ",{"type":27,"tag":484,"props":1136,"children":1137},{"style":517},[1138],{"type":33,"value":1139},"role",{"type":27,"tag":484,"props":1141,"children":1142},{"style":491},[1143],{"type":33,"value":525},{"type":27,"tag":484,"props":1145,"children":1147},{"style":1146},"--shiki-default:#9ECBFF",[1148],{"type":33,"value":1149}," 'system'",{"type":27,"tag":484,"props":1151,"children":1152},{"style":491},[1153],{"type":33,"value":1154}," |",{"type":27,"tag":484,"props":1156,"children":1157},{"style":1146},[1158],{"type":33,"value":1159}," 'user'",{"type":27,"tag":484,"props":1161,"children":1162},{"style":491},[1163],{"type":33,"value":1154},{"type":27,"tag":484,"props":1165,"children":1166},{"style":1146},[1167],{"type":33,"value":1168}," 'assistant'",{"type":27,"tag":484,"props":1170,"children":1171},{"style":508},[1172],{"type":33,"value":1173},"; ",{"type":27,"tag":484,"props":1175,"children":1176},{"style":517},[1177],{"type":33,"value":1178},"content",{"type":27,"tag":484,"props":1180,"children":1181},{"style":491},[1182],{"type":33,"value":525},{"type":27,"tag":484,"props":1184,"children":1185},{"style":528},[1186],{"type":33,"value":1187}," string",{"type":27,"tag":484,"props":1189,"children":1190},{"style":508},[1191],{"type":33,"value":1192}," }\n",{"type":27,"tag":484,"props":1194,"children":1195},{"class":486,"line":20},[1196],{"type":27,"tag":484,"props":1197,"children":1198},{"emptyLinePlaceholder":18},[1199],{"type":33,"value":566},{"type":27,"tag":484,"props":1201,"children":1202},{"class":486,"line":534},[1203,1207,1211,1216,1220,1225,1229,1233,1238,1243,1247,1252],{"type":27,"tag":484,"props":1204,"children":1205},{"style":491},[1206],{"type":33,"value":646},{"type":27,"tag":484,"props":1208,"children":1209},{"style":491},[1210],{"type":33,"value":651},{"type":27,"tag":484,"props":1212,"children":1213},{"style":497},[1214],{"type":33,"value":1215}," trimMessages",{"type":27,"tag":484,"props":1217,"children":1218},{"style":508},[1219],{"type":33,"value":661},{"type":27,"tag":484,"props":1221,"children":1222},{"style":517},[1223],{"type":33,"value":1224},"messages",{"type":27,"tag":484,"props":1226,"children":1227},{"style":491},[1228],{"type":33,"value":525},{"type":27,"tag":484,"props":1230,"children":1231},{"style":497},[1232],{"type":33,"value":1125},{"type":27,"tag":484,"props":1234,"children":1235},{"style":508},[1236],{"type":33,"value":1237},"[], ",{"type":27,"tag":484,"props":1239,"children":1240},{"style":517},[1241],{"type":33,"value":1242},"maxChars",{"type":27,"tag":484,"props":1244,"children":1245},{"style":491},[1246],{"type":33,"value":505},{"type":27,"tag":484,"props":1248,"children":1249},{"style":528},[1250],{"type":33,"value":1251}," 8000",{"type":27,"tag":484,"props":1253,"children":1254},{"style":508},[1255],{"type":33,"value":697},{"type":27,"tag":484,"props":1257,"children":1258},{"class":486,"line":551},[1259,1264,1269,1273,1278,1283,1288,1293],{"type":27,"tag":484,"props":1260,"children":1261},{"style":491},[1262],{"type":33,"value":1263},"  const",{"type":27,"tag":484,"props":1265,"children":1266},{"style":528},[1267],{"type":33,"value":1268}," reversed",{"type":27,"tag":484,"props":1270,"children":1271},{"style":491},[1272],{"type":33,"value":505},{"type":27,"tag":484,"props":1274,"children":1275},{"style":508},[1276],{"type":33,"value":1277}," [",{"type":27,"tag":484,"props":1279,"children":1280},{"style":491},[1281],{"type":33,"value":1282},"...",{"type":27,"tag":484,"props":1284,"children":1285},{"style":508},[1286],{"type":33,"value":1287},"messages].",{"type":27,"tag":484,"props":1289,"children":1290},{"style":497},[1291],{"type":33,"value":1292},"reverse",{"type":27,"tag":484,"props":1294,"children":1295},{"style":508},[1296],{"type":33,"value":1297},"()\n",{"type":27,"tag":484,"props":1299,"children":1300},{"class":486,"line":560},[1301,1305,1310,1314,1318,1323,1328],{"type":27,"tag":484,"props":1302,"children":1303},{"style":491},[1304],{"type":33,"value":1263},{"type":27,"tag":484,"props":1306,"children":1307},{"style":528},[1308],{"type":33,"value":1309}," kept",{"type":27,"tag":484,"props":1311,"children":1312},{"style":491},[1313],{"type":33,"value":525},{"type":27,"tag":484,"props":1315,"children":1316},{"style":497},[1317],{"type":33,"value":1125},{"type":27,"tag":484,"props":1319,"children":1320},{"style":508},[1321],{"type":33,"value":1322},"[] ",{"type":27,"tag":484,"props":1324,"children":1325},{"style":491},[1326],{"type":33,"value":1327},"=",{"type":27,"tag":484,"props":1329,"children":1330},{"style":508},[1331],{"type":33,"value":1332}," []\n",{"type":27,"tag":484,"props":1334,"children":1335},{"class":486,"line":569},[1336,1341,1346,1350],{"type":27,"tag":484,"props":1337,"children":1338},{"style":491},[1339],{"type":33,"value":1340},"  let",{"type":27,"tag":484,"props":1342,"children":1343},{"style":508},[1344],{"type":33,"value":1345}," total ",{"type":27,"tag":484,"props":1347,"children":1348},{"style":491},[1349],{"type":33,"value":1327},{"type":27,"tag":484,"props":1351,"children":1352},{"style":528},[1353],{"type":33,"value":1354}," 0\n",{"type":27,"tag":484,"props":1356,"children":1357},{"class":486,"line":590},[1358],{"type":27,"tag":484,"props":1359,"children":1360},{"emptyLinePlaceholder":18},[1361],{"type":33,"value":566},{"type":27,"tag":484,"props":1363,"children":1364},{"class":486,"line":607},[1365,1370,1375,1380,1385,1390],{"type":27,"tag":484,"props":1366,"children":1367},{"style":491},[1368],{"type":33,"value":1369},"  for",{"type":27,"tag":484,"props":1371,"children":1372},{"style":508},[1373],{"type":33,"value":1374}," (",{"type":27,"tag":484,"props":1376,"children":1377},{"style":491},[1378],{"type":33,"value":1379},"const",{"type":27,"tag":484,"props":1381,"children":1382},{"style":528},[1383],{"type":33,"value":1384}," msg",{"type":27,"tag":484,"props":1386,"children":1387},{"style":491},[1388],{"type":33,"value":1389}," of",{"type":27,"tag":484,"props":1391,"children":1392},{"style":508},[1393],{"type":33,"value":1394}," reversed) {\n",{"type":27,"tag":484,"props":1396,"children":1397},{"class":486,"line":624},[1398,1403,1408,1413],{"type":27,"tag":484,"props":1399,"children":1400},{"style":508},[1401],{"type":33,"value":1402},"    total ",{"type":27,"tag":484,"props":1404,"children":1405},{"style":491},[1406],{"type":33,"value":1407},"+=",{"type":27,"tag":484,"props":1409,"children":1410},{"style":508},[1411],{"type":33,"value":1412}," msg.content.",{"type":27,"tag":484,"props":1414,"children":1415},{"style":528},[1416],{"type":33,"value":1417},"length\n",{"type":27,"tag":484,"props":1419,"children":1420},{"class":486,"line":632},[1421,1426,1431,1436,1441],{"type":27,"tag":484,"props":1422,"children":1423},{"style":491},[1424],{"type":33,"value":1425},"    if",{"type":27,"tag":484,"props":1427,"children":1428},{"style":508},[1429],{"type":33,"value":1430}," (total ",{"type":27,"tag":484,"props":1432,"children":1433},{"style":491},[1434],{"type":33,"value":1435},">",{"type":27,"tag":484,"props":1437,"children":1438},{"style":508},[1439],{"type":33,"value":1440}," maxChars) ",{"type":27,"tag":484,"props":1442,"children":1443},{"style":491},[1444],{"type":33,"value":1445},"break\n",{"type":27,"tag":484,"props":1447,"children":1448},{"class":486,"line":640},[1449,1454,1459],{"type":27,"tag":484,"props":1450,"children":1451},{"style":508},[1452],{"type":33,"value":1453},"    kept.",{"type":27,"tag":484,"props":1455,"children":1456},{"style":497},[1457],{"type":33,"value":1458},"push",{"type":27,"tag":484,"props":1460,"children":1461},{"style":508},[1462],{"type":33,"value":1463},"(msg)\n",{"type":27,"tag":484,"props":1465,"children":1466},{"class":486,"line":700},[1467],{"type":27,"tag":484,"props":1468,"children":1469},{"style":508},[1470],{"type":33,"value":1471},"  }\n",{"type":27,"tag":484,"props":1473,"children":1474},{"class":486,"line":714},[1475],{"type":27,"tag":484,"props":1476,"children":1477},{"emptyLinePlaceholder":18},[1478],{"type":33,"value":566},{"type":27,"tag":484,"props":1480,"children":1481},{"class":486,"line":753},[1482,1486,1491,1495],{"type":27,"tag":484,"props":1483,"children":1484},{"style":491},[1485],{"type":33,"value":706},{"type":27,"tag":484,"props":1487,"children":1488},{"style":508},[1489],{"type":33,"value":1490}," kept.",{"type":27,"tag":484,"props":1492,"children":1493},{"style":497},[1494],{"type":33,"value":1292},{"type":27,"tag":484,"props":1496,"children":1497},{"style":508},[1498],{"type":33,"value":1297},{"type":27,"tag":484,"props":1500,"children":1501},{"class":486,"line":783},[1502],{"type":27,"tag":484,"props":1503,"children":1504},{"style":508},[1505],{"type":33,"value":557},{"type":27,"tag":35,"props":1507,"children":1508},{},[1509],{"type":33,"value":1510},"这当然不是严格 token 级裁剪，但能说明一个核心原则：",{"type":27,"tag":114,"props":1512,"children":1513},{},[1514],{"type":27,"tag":35,"props":1515,"children":1516},{},[1517],{"type":27,"tag":121,"props":1518,"children":1519},{},[1520],{"type":33,"value":1521},"上下文不是越多越好，而是越相关越好。",{"type":27,"tag":233,"props":1523,"children":1525},{"id":1524},"_43-输出约束第二优先级",[1526],{"type":33,"value":1527},"4.3 输出约束：第二优先级",{"type":27,"tag":35,"props":1529,"children":1530},{},[1531],{"type":33,"value":1532},"很多团队认真管输入，却不管输出。结果模型生成了大量业务并不需要的文字。",{"type":27,"tag":35,"props":1534,"children":1535},{},[1536],{"type":33,"value":1537},"更好的写法是明确限制：",{"type":27,"tag":41,"props":1539,"children":1540},{},[1541,1546,1551,1556],{"type":27,"tag":45,"props":1542,"children":1543},{},[1544],{"type":33,"value":1545},"最多 3 条要点",{"type":27,"tag":45,"props":1547,"children":1548},{},[1549],{"type":33,"value":1550},"每条不超过 40 字",{"type":27,"tag":45,"props":1552,"children":1553},{},[1554],{"type":33,"value":1555},"只输出 JSON",{"type":27,"tag":45,"props":1557,"children":1558},{},[1559],{"type":33,"value":1560},"不要解释推理过程",{"type":27,"tag":35,"props":1562,"children":1563},{},[1564],{"type":33,"value":1565},"输出越受控，越省钱，也越稳定。",{"type":27,"tag":171,"props":1567,"children":1568},{},[],{"type":27,"tag":175,"props":1570,"children":1572},{"id":1571},"五模型分层把贵模型用在最值钱的地方",[1573],{"type":33,"value":1574},"五、模型分层：把“贵模型”用在最值钱的地方",{"type":27,"tag":35,"props":1576,"children":1577},{},[1578],{"type":33,"value":1579},"这是非常实用的工程策略。",{"type":27,"tag":233,"props":1581,"children":1583},{"id":1582},"_51-不同任务配不同模型",[1584],{"type":33,"value":1585},"5.1 不同任务，配不同模型",{"type":27,"tag":35,"props":1587,"children":1588},{},[1589],{"type":33,"value":897},{"type":27,"tag":353,"props":1591,"children":1592},{},[1593,1609],{"type":27,"tag":357,"props":1594,"children":1595},{},[1596],{"type":27,"tag":361,"props":1597,"children":1598},{},[1599,1604],{"type":27,"tag":365,"props":1600,"children":1601},{},[1602],{"type":33,"value":1603},"任务",{"type":27,"tag":365,"props":1605,"children":1606},{},[1607],{"type":33,"value":1608},"推荐策略",{"type":27,"tag":376,"props":1610,"children":1611},{},[1612,1625,1638,1651],{"type":27,"tag":361,"props":1613,"children":1614},{},[1615,1620],{"type":27,"tag":383,"props":1616,"children":1617},{},[1618],{"type":33,"value":1619},"分类、抽取",{"type":27,"tag":383,"props":1621,"children":1622},{},[1623],{"type":33,"value":1624},"用轻量模型",{"type":27,"tag":361,"props":1626,"children":1627},{},[1628,1633],{"type":27,"tag":383,"props":1629,"children":1630},{},[1631],{"type":33,"value":1632},"标题生成、短摘要",{"type":27,"tag":383,"props":1634,"children":1635},{},[1636],{"type":33,"value":1637},"默认模型",{"type":27,"tag":361,"props":1639,"children":1640},{},[1641,1646],{"type":27,"tag":383,"props":1642,"children":1643},{},[1644],{"type":33,"value":1645},"高价值复杂问答",{"type":27,"tag":383,"props":1647,"children":1648},{},[1649],{"type":33,"value":1650},"强模型",{"type":27,"tag":361,"props":1652,"children":1653},{},[1654,1659],{"type":27,"tag":383,"props":1655,"children":1656},{},[1657],{"type":33,"value":1658},"fallback",{"type":27,"tag":383,"props":1660,"children":1661},{},[1662],{"type":33,"value":1663},"备用便宜模型或规则兜底",{"type":27,"tag":233,"props":1665,"children":1667},{"id":1666},"_52-为什么这比统一换便宜模型更稳",[1668],{"type":33,"value":1669},"5.2 为什么这比统一换便宜模型更稳",{"type":27,"tag":35,"props":1671,"children":1672},{},[1673],{"type":33,"value":1674},"因为用户真正感知的，不是“你整体用了哪个模型”，而是：",{"type":27,"tag":41,"props":1676,"children":1677},{},[1678,1683,1688],{"type":27,"tag":45,"props":1679,"children":1680},{},[1681],{"type":33,"value":1682},"关键任务质量够不够",{"type":27,"tag":45,"props":1684,"children":1685},{},[1686],{"type":33,"value":1687},"响应快不快",{"type":27,"tag":45,"props":1689,"children":1690},{},[1691],{"type":33,"value":1692},"错误多不多",{"type":27,"tag":35,"props":1694,"children":1695},{},[1696],{"type":33,"value":1697},"一刀切换便宜模型，可能省了账单，却把转化和留存打掉。",{"type":27,"tag":233,"props":1699,"children":1701},{"id":1700},"_53-一个路由示例",[1702],{"type":33,"value":1703},"5.3 一个路由示例",{"type":27,"tag":473,"props":1705,"children":1707},{"className":475,"code":1706,"language":477,"meta":7,"style":7},"export function chooseModel(task: 'classify' | 'summary' | 'qa') {\n  switch (task) {\n    case 'classify':\n      return 'small-fast-model'\n    case 'summary':\n      return 'balanced-model'\n    case 'qa':\n      return 'strong-reasoning-model'\n    default:\n      return 'balanced-model'\n  }\n}\n",[1708],{"type":27,"tag":480,"props":1709,"children":1710},{"__ignoreMap":7},[1711,1767,1780,1797,1810,1825,1837,1852,1864,1876,1887,1894],{"type":27,"tag":484,"props":1712,"children":1713},{"class":486,"line":487},[1714,1718,1722,1727,1731,1736,1740,1745,1749,1754,1758,1763],{"type":27,"tag":484,"props":1715,"children":1716},{"style":491},[1717],{"type":33,"value":646},{"type":27,"tag":484,"props":1719,"children":1720},{"style":491},[1721],{"type":33,"value":651},{"type":27,"tag":484,"props":1723,"children":1724},{"style":497},[1725],{"type":33,"value":1726}," chooseModel",{"type":27,"tag":484,"props":1728,"children":1729},{"style":508},[1730],{"type":33,"value":661},{"type":27,"tag":484,"props":1732,"children":1733},{"style":517},[1734],{"type":33,"value":1735},"task",{"type":27,"tag":484,"props":1737,"children":1738},{"style":491},[1739],{"type":33,"value":525},{"type":27,"tag":484,"props":1741,"children":1742},{"style":1146},[1743],{"type":33,"value":1744}," 'classify'",{"type":27,"tag":484,"props":1746,"children":1747},{"style":491},[1748],{"type":33,"value":1154},{"type":27,"tag":484,"props":1750,"children":1751},{"style":1146},[1752],{"type":33,"value":1753}," 'summary'",{"type":27,"tag":484,"props":1755,"children":1756},{"style":491},[1757],{"type":33,"value":1154},{"type":27,"tag":484,"props":1759,"children":1760},{"style":1146},[1761],{"type":33,"value":1762}," 'qa'",{"type":27,"tag":484,"props":1764,"children":1765},{"style":508},[1766],{"type":33,"value":697},{"type":27,"tag":484,"props":1768,"children":1769},{"class":486,"line":20},[1770,1775],{"type":27,"tag":484,"props":1771,"children":1772},{"style":491},[1773],{"type":33,"value":1774},"  switch",{"type":27,"tag":484,"props":1776,"children":1777},{"style":508},[1778],{"type":33,"value":1779}," (task) {\n",{"type":27,"tag":484,"props":1781,"children":1782},{"class":486,"line":534},[1783,1788,1792],{"type":27,"tag":484,"props":1784,"children":1785},{"style":491},[1786],{"type":33,"value":1787},"    case",{"type":27,"tag":484,"props":1789,"children":1790},{"style":1146},[1791],{"type":33,"value":1744},{"type":27,"tag":484,"props":1793,"children":1794},{"style":508},[1795],{"type":33,"value":1796},":\n",{"type":27,"tag":484,"props":1798,"children":1799},{"class":486,"line":551},[1800,1805],{"type":27,"tag":484,"props":1801,"children":1802},{"style":491},[1803],{"type":33,"value":1804},"      return",{"type":27,"tag":484,"props":1806,"children":1807},{"style":1146},[1808],{"type":33,"value":1809}," 'small-fast-model'\n",{"type":27,"tag":484,"props":1811,"children":1812},{"class":486,"line":560},[1813,1817,1821],{"type":27,"tag":484,"props":1814,"children":1815},{"style":491},[1816],{"type":33,"value":1787},{"type":27,"tag":484,"props":1818,"children":1819},{"style":1146},[1820],{"type":33,"value":1753},{"type":27,"tag":484,"props":1822,"children":1823},{"style":508},[1824],{"type":33,"value":1796},{"type":27,"tag":484,"props":1826,"children":1827},{"class":486,"line":569},[1828,1832],{"type":27,"tag":484,"props":1829,"children":1830},{"style":491},[1831],{"type":33,"value":1804},{"type":27,"tag":484,"props":1833,"children":1834},{"style":1146},[1835],{"type":33,"value":1836}," 'balanced-model'\n",{"type":27,"tag":484,"props":1838,"children":1839},{"class":486,"line":590},[1840,1844,1848],{"type":27,"tag":484,"props":1841,"children":1842},{"style":491},[1843],{"type":33,"value":1787},{"type":27,"tag":484,"props":1845,"children":1846},{"style":1146},[1847],{"type":33,"value":1762},{"type":27,"tag":484,"props":1849,"children":1850},{"style":508},[1851],{"type":33,"value":1796},{"type":27,"tag":484,"props":1853,"children":1854},{"class":486,"line":607},[1855,1859],{"type":27,"tag":484,"props":1856,"children":1857},{"style":491},[1858],{"type":33,"value":1804},{"type":27,"tag":484,"props":1860,"children":1861},{"style":1146},[1862],{"type":33,"value":1863}," 'strong-reasoning-model'\n",{"type":27,"tag":484,"props":1865,"children":1866},{"class":486,"line":624},[1867,1872],{"type":27,"tag":484,"props":1868,"children":1869},{"style":491},[1870],{"type":33,"value":1871},"    default",{"type":27,"tag":484,"props":1873,"children":1874},{"style":508},[1875],{"type":33,"value":1796},{"type":27,"tag":484,"props":1877,"children":1878},{"class":486,"line":632},[1879,1883],{"type":27,"tag":484,"props":1880,"children":1881},{"style":491},[1882],{"type":33,"value":1804},{"type":27,"tag":484,"props":1884,"children":1885},{"style":1146},[1886],{"type":33,"value":1836},{"type":27,"tag":484,"props":1888,"children":1889},{"class":486,"line":640},[1890],{"type":27,"tag":484,"props":1891,"children":1892},{"style":508},[1893],{"type":33,"value":1471},{"type":27,"tag":484,"props":1895,"children":1896},{"class":486,"line":700},[1897],{"type":27,"tag":484,"props":1898,"children":1899},{"style":508},[1900],{"type":33,"value":557},{"type":27,"tag":35,"props":1902,"children":1903},{},[1904,1906,1911],{"type":33,"value":1905},"真正成熟的系统，做的是",{"type":27,"tag":121,"props":1907,"children":1908},{},[1909],{"type":33,"value":1910},"任务路由",{"type":33,"value":1912},"，不是“全站统一一个模型名”。",{"type":27,"tag":171,"props":1914,"children":1915},{},[],{"type":27,"tag":175,"props":1917,"children":1919},{"id":1918},"六缓存复用不是所有请求都值得重新生成",[1920],{"type":33,"value":1921},"六、缓存复用：不是所有请求都值得重新生成",{"type":27,"tag":35,"props":1923,"children":1924},{},[1925],{"type":33,"value":1926},"很多 AI 场景有天然复用价值，但团队经常忽略。",{"type":27,"tag":233,"props":1928,"children":1930},{"id":1929},"_61-适合缓存的-ai-结果",[1931],{"type":33,"value":1932},"6.1 适合缓存的 AI 结果",{"type":27,"tag":41,"props":1934,"children":1935},{},[1936,1941,1946,1951,1956],{"type":27,"tag":45,"props":1937,"children":1938},{},[1939],{"type":33,"value":1940},"热门文章摘要",{"type":27,"tag":45,"props":1942,"children":1943},{},[1944],{"type":33,"value":1945},"常见 FAQ 问答",{"type":27,"tag":45,"props":1947,"children":1948},{},[1949],{"type":33,"value":1950},"商品卖点提炼",{"type":27,"tag":45,"props":1952,"children":1953},{},[1954],{"type":33,"value":1955},"标签生成结果",{"type":27,"tag":45,"props":1957,"children":1958},{},[1959],{"type":33,"value":1960},"内容审核初筛结果",{"type":27,"tag":233,"props":1962,"children":1964},{"id":1963},"_62-不适合直接缓存的场景",[1965],{"type":33,"value":1966},"6.2 不适合直接缓存的场景",{"type":27,"tag":41,"props":1968,"children":1969},{},[1970,1975,1980],{"type":27,"tag":45,"props":1971,"children":1972},{},[1973],{"type":33,"value":1974},"强个性化对话",{"type":27,"tag":45,"props":1976,"children":1977},{},[1978],{"type":33,"value":1979},"高时效信息",{"type":27,"tag":45,"props":1981,"children":1982},{},[1983],{"type":33,"value":1984},"依赖实时上下文的回答",{"type":27,"tag":233,"props":1986,"children":1988},{"id":1987},"_63-一个简单的缓存思路",[1989],{"type":33,"value":1990},"6.3 一个简单的缓存思路",{"type":27,"tag":473,"props":1992,"children":1994},{"className":475,"code":1993,"language":477,"meta":7,"style":7},"import crypto from 'crypto'\n\nexport function makePromptCacheKey(task: string, input: string, version: string) {\n  const hash = crypto\n    .createHash('sha256')\n    .update(`${task}:${version}:${input}`)\n    .digest('hex')\n\n  return `llm:${task}:${version}:${hash}`\n}\n",[1995],{"type":27,"tag":480,"props":1996,"children":1997},{"__ignoreMap":7},[1998,2021,2028,2098,2119,2146,2197,2222,2229,2267],{"type":27,"tag":484,"props":1999,"children":2000},{"class":486,"line":487},[2001,2006,2011,2016],{"type":27,"tag":484,"props":2002,"children":2003},{"style":491},[2004],{"type":33,"value":2005},"import",{"type":27,"tag":484,"props":2007,"children":2008},{"style":508},[2009],{"type":33,"value":2010}," crypto ",{"type":27,"tag":484,"props":2012,"children":2013},{"style":491},[2014],{"type":33,"value":2015},"from",{"type":27,"tag":484,"props":2017,"children":2018},{"style":1146},[2019],{"type":33,"value":2020}," 'crypto'\n",{"type":27,"tag":484,"props":2022,"children":2023},{"class":486,"line":20},[2024],{"type":27,"tag":484,"props":2025,"children":2026},{"emptyLinePlaceholder":18},[2027],{"type":33,"value":566},{"type":27,"tag":484,"props":2029,"children":2030},{"class":486,"line":534},[2031,2035,2039,2044,2048,2052,2056,2060,2064,2069,2073,2077,2081,2086,2090,2094],{"type":27,"tag":484,"props":2032,"children":2033},{"style":491},[2034],{"type":33,"value":646},{"type":27,"tag":484,"props":2036,"children":2037},{"style":491},[2038],{"type":33,"value":651},{"type":27,"tag":484,"props":2040,"children":2041},{"style":497},[2042],{"type":33,"value":2043}," makePromptCacheKey",{"type":27,"tag":484,"props":2045,"children":2046},{"style":508},[2047],{"type":33,"value":661},{"type":27,"tag":484,"props":2049,"children":2050},{"style":517},[2051],{"type":33,"value":1735},{"type":27,"tag":484,"props":2053,"children":2054},{"style":491},[2055],{"type":33,"value":525},{"type":27,"tag":484,"props":2057,"children":2058},{"style":528},[2059],{"type":33,"value":1187},{"type":27,"tag":484,"props":2061,"children":2062},{"style":508},[2063],{"type":33,"value":679},{"type":27,"tag":484,"props":2065,"children":2066},{"style":517},[2067],{"type":33,"value":2068},"input",{"type":27,"tag":484,"props":2070,"children":2071},{"style":491},[2072],{"type":33,"value":525},{"type":27,"tag":484,"props":2074,"children":2075},{"style":528},[2076],{"type":33,"value":1187},{"type":27,"tag":484,"props":2078,"children":2079},{"style":508},[2080],{"type":33,"value":679},{"type":27,"tag":484,"props":2082,"children":2083},{"style":517},[2084],{"type":33,"value":2085},"version",{"type":27,"tag":484,"props":2087,"children":2088},{"style":491},[2089],{"type":33,"value":525},{"type":27,"tag":484,"props":2091,"children":2092},{"style":528},[2093],{"type":33,"value":1187},{"type":27,"tag":484,"props":2095,"children":2096},{"style":508},[2097],{"type":33,"value":697},{"type":27,"tag":484,"props":2099,"children":2100},{"class":486,"line":551},[2101,2105,2110,2114],{"type":27,"tag":484,"props":2102,"children":2103},{"style":491},[2104],{"type":33,"value":1263},{"type":27,"tag":484,"props":2106,"children":2107},{"style":528},[2108],{"type":33,"value":2109}," hash",{"type":27,"tag":484,"props":2111,"children":2112},{"style":491},[2113],{"type":33,"value":505},{"type":27,"tag":484,"props":2115,"children":2116},{"style":508},[2117],{"type":33,"value":2118}," crypto\n",{"type":27,"tag":484,"props":2120,"children":2121},{"class":486,"line":560},[2122,2127,2132,2136,2141],{"type":27,"tag":484,"props":2123,"children":2124},{"style":508},[2125],{"type":33,"value":2126},"    .",{"type":27,"tag":484,"props":2128,"children":2129},{"style":497},[2130],{"type":33,"value":2131},"createHash",{"type":27,"tag":484,"props":2133,"children":2134},{"style":508},[2135],{"type":33,"value":661},{"type":27,"tag":484,"props":2137,"children":2138},{"style":1146},[2139],{"type":33,"value":2140},"'sha256'",{"type":27,"tag":484,"props":2142,"children":2143},{"style":508},[2144],{"type":33,"value":2145},")\n",{"type":27,"tag":484,"props":2147,"children":2148},{"class":486,"line":569},[2149,2153,2158,2162,2167,2171,2176,2180,2184,2188,2193],{"type":27,"tag":484,"props":2150,"children":2151},{"style":508},[2152],{"type":33,"value":2126},{"type":27,"tag":484,"props":2154,"children":2155},{"style":497},[2156],{"type":33,"value":2157},"update",{"type":27,"tag":484,"props":2159,"children":2160},{"style":508},[2161],{"type":33,"value":661},{"type":27,"tag":484,"props":2163,"children":2164},{"style":1146},[2165],{"type":33,"value":2166},"`${",{"type":27,"tag":484,"props":2168,"children":2169},{"style":508},[2170],{"type":33,"value":1735},{"type":27,"tag":484,"props":2172,"children":2173},{"style":1146},[2174],{"type":33,"value":2175},"}:${",{"type":27,"tag":484,"props":2177,"children":2178},{"style":508},[2179],{"type":33,"value":2085},{"type":27,"tag":484,"props":2181,"children":2182},{"style":1146},[2183],{"type":33,"value":2175},{"type":27,"tag":484,"props":2185,"children":2186},{"style":508},[2187],{"type":33,"value":2068},{"type":27,"tag":484,"props":2189,"children":2190},{"style":1146},[2191],{"type":33,"value":2192},"}`",{"type":27,"tag":484,"props":2194,"children":2195},{"style":508},[2196],{"type":33,"value":2145},{"type":27,"tag":484,"props":2198,"children":2199},{"class":486,"line":590},[2200,2204,2209,2213,2218],{"type":27,"tag":484,"props":2201,"children":2202},{"style":508},[2203],{"type":33,"value":2126},{"type":27,"tag":484,"props":2205,"children":2206},{"style":497},[2207],{"type":33,"value":2208},"digest",{"type":27,"tag":484,"props":2210,"children":2211},{"style":508},[2212],{"type":33,"value":661},{"type":27,"tag":484,"props":2214,"children":2215},{"style":1146},[2216],{"type":33,"value":2217},"'hex'",{"type":27,"tag":484,"props":2219,"children":2220},{"style":508},[2221],{"type":33,"value":2145},{"type":27,"tag":484,"props":2223,"children":2224},{"class":486,"line":607},[2225],{"type":27,"tag":484,"props":2226,"children":2227},{"emptyLinePlaceholder":18},[2228],{"type":33,"value":566},{"type":27,"tag":484,"props":2230,"children":2231},{"class":486,"line":624},[2232,2236,2241,2245,2249,2253,2257,2262],{"type":27,"tag":484,"props":2233,"children":2234},{"style":491},[2235],{"type":33,"value":706},{"type":27,"tag":484,"props":2237,"children":2238},{"style":1146},[2239],{"type":33,"value":2240}," `llm:${",{"type":27,"tag":484,"props":2242,"children":2243},{"style":508},[2244],{"type":33,"value":1735},{"type":27,"tag":484,"props":2246,"children":2247},{"style":1146},[2248],{"type":33,"value":2175},{"type":27,"tag":484,"props":2250,"children":2251},{"style":508},[2252],{"type":33,"value":2085},{"type":27,"tag":484,"props":2254,"children":2255},{"style":1146},[2256],{"type":33,"value":2175},{"type":27,"tag":484,"props":2258,"children":2259},{"style":508},[2260],{"type":33,"value":2261},"hash",{"type":27,"tag":484,"props":2263,"children":2264},{"style":1146},[2265],{"type":33,"value":2266},"}`\n",{"type":27,"tag":484,"props":2268,"children":2269},{"class":486,"line":632},[2270],{"type":27,"tag":484,"props":2271,"children":2272},{"style":508},[2273],{"type":33,"value":557},{"type":27,"tag":35,"props":2275,"children":2276},{},[2277],{"type":33,"value":2278},"这里的关键点是把：",{"type":27,"tag":41,"props":2280,"children":2281},{},[2282,2287,2292],{"type":27,"tag":45,"props":2283,"children":2284},{},[2285],{"type":33,"value":2286},"任务类型",{"type":27,"tag":45,"props":2288,"children":2289},{},[2290],{"type":33,"value":2291},"prompt 版本",{"type":27,"tag":45,"props":2293,"children":2294},{},[2295],{"type":33,"value":2296},"输入内容",{"type":27,"tag":35,"props":2298,"children":2299},{},[2300],{"type":33,"value":2301},"都纳入 key。否则 prompt 改了，旧结果还会继续污染新逻辑。",{"type":27,"tag":171,"props":2303,"children":2304},{},[],{"type":27,"tag":175,"props":2306,"children":2308},{"id":2307},"七限制重试和重复调用很多钱不是花在回答而是花在重新回答",[2309],{"type":33,"value":2310},"七、限制重试和重复调用：很多钱不是花在“回答”，而是花在“重新回答”",{"type":27,"tag":233,"props":2312,"children":2314},{"id":2313},"_71-常见重复调用来源",[2315],{"type":33,"value":2316},"7.1 常见重复调用来源",{"type":27,"tag":41,"props":2318,"children":2319},{},[2320,2325,2330,2335],{"type":27,"tag":45,"props":2321,"children":2322},{},[2323],{"type":33,"value":2324},"前端按钮重复点击",{"type":27,"tag":45,"props":2326,"children":2327},{},[2328],{"type":33,"value":2329},"SSE 中断后直接重放整次请求",{"type":27,"tag":45,"props":2331,"children":2332},{},[2333],{"type":33,"value":2334},"后端超时阈值太短，导致误判失败再重试",{"type":27,"tag":45,"props":2336,"children":2337},{},[2338],{"type":33,"value":2339},"一个页面多个模块对同一文本各调一次模型",{"type":27,"tag":233,"props":2341,"children":2343},{"id":2342},"_72-两个实用手段",[2344],{"type":33,"value":2345},"7.2 两个实用手段",{"type":27,"tag":824,"props":2347,"children":2349},{"id":2348},"a-请求去重",[2350],{"type":33,"value":2351},"A. 请求去重",{"type":27,"tag":35,"props":2353,"children":2354},{},[2355],{"type":33,"value":2356},"同一用户、同一输入、短时间内相同请求，可以复用结果或合并中间态。",{"type":27,"tag":824,"props":2358,"children":2360},{"id":2359},"b-幂等-key",[2361],{"type":33,"value":2362},"B. 幂等 key",{"type":27,"tag":35,"props":2364,"children":2365},{},[2366],{"type":33,"value":2367},"对关键调用增加幂等标识，避免链路重复提交。",{"type":27,"tag":473,"props":2369,"children":2371},{"className":475,"code":2370,"language":477,"meta":7,"style":7},"export function makeIdempotencyKey(userId: string, action: string, payload: string) {\n  return `${userId}:${action}:${payload}`\n}\n",[2372],{"type":27,"tag":480,"props":2373,"children":2374},{"__ignoreMap":7},[2375,2446,2482],{"type":27,"tag":484,"props":2376,"children":2377},{"class":486,"line":487},[2378,2382,2386,2391,2395,2400,2404,2408,2412,2417,2421,2425,2429,2434,2438,2442],{"type":27,"tag":484,"props":2379,"children":2380},{"style":491},[2381],{"type":33,"value":646},{"type":27,"tag":484,"props":2383,"children":2384},{"style":491},[2385],{"type":33,"value":651},{"type":27,"tag":484,"props":2387,"children":2388},{"style":497},[2389],{"type":33,"value":2390}," makeIdempotencyKey",{"type":27,"tag":484,"props":2392,"children":2393},{"style":508},[2394],{"type":33,"value":661},{"type":27,"tag":484,"props":2396,"children":2397},{"style":517},[2398],{"type":33,"value":2399},"userId",{"type":27,"tag":484,"props":2401,"children":2402},{"style":491},[2403],{"type":33,"value":525},{"type":27,"tag":484,"props":2405,"children":2406},{"style":528},[2407],{"type":33,"value":1187},{"type":27,"tag":484,"props":2409,"children":2410},{"style":508},[2411],{"type":33,"value":679},{"type":27,"tag":484,"props":2413,"children":2414},{"style":517},[2415],{"type":33,"value":2416},"action",{"type":27,"tag":484,"props":2418,"children":2419},{"style":491},[2420],{"type":33,"value":525},{"type":27,"tag":484,"props":2422,"children":2423},{"style":528},[2424],{"type":33,"value":1187},{"type":27,"tag":484,"props":2426,"children":2427},{"style":508},[2428],{"type":33,"value":679},{"type":27,"tag":484,"props":2430,"children":2431},{"style":517},[2432],{"type":33,"value":2433},"payload",{"type":27,"tag":484,"props":2435,"children":2436},{"style":491},[2437],{"type":33,"value":525},{"type":27,"tag":484,"props":2439,"children":2440},{"style":528},[2441],{"type":33,"value":1187},{"type":27,"tag":484,"props":2443,"children":2444},{"style":508},[2445],{"type":33,"value":697},{"type":27,"tag":484,"props":2447,"children":2448},{"class":486,"line":20},[2449,2453,2458,2462,2466,2470,2474,2478],{"type":27,"tag":484,"props":2450,"children":2451},{"style":491},[2452],{"type":33,"value":706},{"type":27,"tag":484,"props":2454,"children":2455},{"style":1146},[2456],{"type":33,"value":2457}," `${",{"type":27,"tag":484,"props":2459,"children":2460},{"style":508},[2461],{"type":33,"value":2399},{"type":27,"tag":484,"props":2463,"children":2464},{"style":1146},[2465],{"type":33,"value":2175},{"type":27,"tag":484,"props":2467,"children":2468},{"style":508},[2469],{"type":33,"value":2416},{"type":27,"tag":484,"props":2471,"children":2472},{"style":1146},[2473],{"type":33,"value":2175},{"type":27,"tag":484,"props":2475,"children":2476},{"style":508},[2477],{"type":33,"value":2433},{"type":27,"tag":484,"props":2479,"children":2480},{"style":1146},[2481],{"type":33,"value":2266},{"type":27,"tag":484,"props":2483,"children":2484},{"class":486,"line":534},[2485],{"type":27,"tag":484,"props":2486,"children":2487},{"style":508},[2488],{"type":33,"value":557},{"type":27,"tag":35,"props":2490,"children":2491},{},[2492],{"type":33,"value":2493},"这不是 AI 特有问题，但在 AI 场景里，因为调用成本更高，所以收益特别明显。",{"type":27,"tag":171,"props":2495,"children":2496},{},[],{"type":27,"tag":175,"props":2498,"children":2500},{"id":2499},"八预算与告警不要等月底才知道超支",[2501],{"type":33,"value":2502},"八、预算与告警：不要等月底才知道超支",{"type":27,"tag":233,"props":2504,"children":2506},{"id":2505},"_81-至少做三层预算",[2507],{"type":33,"value":2508},"8.1 至少做三层预算",{"type":27,"tag":353,"props":2510,"children":2511},{},[2512,2528],{"type":27,"tag":357,"props":2513,"children":2514},{},[2515],{"type":27,"tag":361,"props":2516,"children":2517},{},[2518,2523],{"type":27,"tag":365,"props":2519,"children":2520},{},[2521],{"type":33,"value":2522},"层级",{"type":27,"tag":365,"props":2524,"children":2525},{},[2526],{"type":33,"value":2527},"作用",{"type":27,"tag":376,"props":2529,"children":2530},{},[2531,2544,2557],{"type":27,"tag":361,"props":2532,"children":2533},{},[2534,2539],{"type":27,"tag":383,"props":2535,"children":2536},{},[2537],{"type":33,"value":2538},"日预算",{"type":27,"tag":383,"props":2540,"children":2541},{},[2542],{"type":33,"value":2543},"及时发现异常放量",{"type":27,"tag":361,"props":2545,"children":2546},{},[2547,2552],{"type":27,"tag":383,"props":2548,"children":2549},{},[2550],{"type":33,"value":2551},"功能预算",{"type":27,"tag":383,"props":2553,"children":2554},{},[2555],{"type":33,"value":2556},"看哪个模块在失控",{"type":27,"tag":361,"props":2558,"children":2559},{},[2560,2565],{"type":27,"tag":383,"props":2561,"children":2562},{},[2563],{"type":33,"value":2564},"用户 / 租户预算",{"type":27,"tag":383,"props":2566,"children":2567},{},[2568],{"type":33,"value":2569},"控制大客户或异常行为",{"type":27,"tag":233,"props":2571,"children":2573},{"id":2572},"_82-一个实用告警方式",[2574],{"type":33,"value":2575},"8.2 一个实用告警方式",{"type":27,"tag":41,"props":2577,"children":2578},{},[2579,2584,2589],{"type":27,"tag":45,"props":2580,"children":2581},{},[2582],{"type":33,"value":2583},"当日成本达到预算的 70%：提醒",{"type":27,"tag":45,"props":2585,"children":2586},{},[2587],{"type":33,"value":2588},"达到 90%：通知 owner",{"type":27,"tag":45,"props":2590,"children":2591},{},[2592],{"type":33,"value":2593},"达到 100%：触发限流、切模型或关闭非核心功能",{"type":27,"tag":233,"props":2595,"children":2597},{"id":2596},"_83-为什么预算不是财务动作而是系统保护动作",[2598],{"type":33,"value":2599},"8.3 为什么预算不是财务动作，而是系统保护动作",{"type":27,"tag":35,"props":2601,"children":2602},{},[2603],{"type":33,"value":2604},"因为当成本失控时，背后往往对应：",{"type":27,"tag":41,"props":2606,"children":2607},{},[2608,2613,2618,2623],{"type":27,"tag":45,"props":2609,"children":2610},{},[2611],{"type":33,"value":2612},"异常重试",{"type":27,"tag":45,"props":2614,"children":2615},{},[2616],{"type":33,"value":2617},"用户滥用",{"type":27,"tag":45,"props":2619,"children":2620},{},[2621],{"type":33,"value":2622},"prompt 膨胀",{"type":27,"tag":45,"props":2624,"children":2625},{},[2626],{"type":33,"value":2627},"某次发布带来了额外调用",{"type":27,"tag":35,"props":2629,"children":2630},{},[2631],{"type":33,"value":2632},"预算报警，很多时候就是异常检测。",{"type":27,"tag":171,"props":2634,"children":2635},{},[],{"type":27,"tag":175,"props":2637,"children":2639},{"id":2638},"九怎样做成本归因不然你永远不知道该先优化哪",[2640],{"type":33,"value":2641},"九、怎样做成本归因：不然你永远不知道该先优化哪",{"type":27,"tag":35,"props":2643,"children":2644},{},[2645],{"type":33,"value":2646},"很多团队知道“本月花了很多”，但不知道：",{"type":27,"tag":41,"props":2648,"children":2649},{},[2650,2655,2660,2665],{"type":27,"tag":45,"props":2651,"children":2652},{},[2653],{"type":33,"value":2654},"花在哪个产品功能上",{"type":27,"tag":45,"props":2656,"children":2657},{},[2658],{"type":33,"value":2659},"哪个模型贡献最多",{"type":27,"tag":45,"props":2661,"children":2662},{},[2663],{"type":33,"value":2664},"哪个租户最贵",{"type":27,"tag":45,"props":2666,"children":2667},{},[2668],{"type":33,"value":2669},"是输入贵还是输出贵",{"type":27,"tag":233,"props":2671,"children":2673},{"id":2672},"_91-建议最少记录这些维度",[2674],{"type":33,"value":2675},"9.1 建议最少记录这些维度",{"type":27,"tag":41,"props":2677,"children":2678},{},[2679,2688,2697,2706,2715,2732,2741,2750],{"type":27,"tag":45,"props":2680,"children":2681},{},[2682],{"type":27,"tag":480,"props":2683,"children":2685},{"className":2684},[],[2686],{"type":33,"value":2687},"featureName",{"type":27,"tag":45,"props":2689,"children":2690},{},[2691],{"type":27,"tag":480,"props":2692,"children":2694},{"className":2693},[],[2695],{"type":33,"value":2696},"taskType",{"type":27,"tag":45,"props":2698,"children":2699},{},[2700],{"type":27,"tag":480,"props":2701,"children":2703},{"className":2702},[],[2704],{"type":33,"value":2705},"model",{"type":27,"tag":45,"props":2707,"children":2708},{},[2709],{"type":27,"tag":480,"props":2710,"children":2712},{"className":2711},[],[2713],{"type":33,"value":2714},"promptVersion",{"type":27,"tag":45,"props":2716,"children":2717},{},[2718,2724,2726],{"type":27,"tag":480,"props":2719,"children":2721},{"className":2720},[],[2722],{"type":33,"value":2723},"tenantId",{"type":33,"value":2725}," / ",{"type":27,"tag":480,"props":2727,"children":2729},{"className":2728},[],[2730],{"type":33,"value":2731},"userSegment",{"type":27,"tag":45,"props":2733,"children":2734},{},[2735],{"type":27,"tag":480,"props":2736,"children":2738},{"className":2737},[],[2739],{"type":33,"value":2740},"promptTokens",{"type":27,"tag":45,"props":2742,"children":2743},{},[2744],{"type":27,"tag":480,"props":2745,"children":2747},{"className":2746},[],[2748],{"type":33,"value":2749},"completionTokens",{"type":27,"tag":45,"props":2751,"children":2752},{},[2753],{"type":27,"tag":480,"props":2754,"children":2756},{"className":2755},[],[2757],{"type":33,"value":2758},"estimatedCost",{"type":27,"tag":233,"props":2760,"children":2762},{"id":2761},"_92-一个记录示例",[2763],{"type":33,"value":2764},"9.2 一个记录示例",{"type":27,"tag":473,"props":2766,"children":2768},{"className":475,"code":2767,"language":477,"meta":7,"style":7},"logger.info('llm.usage', {\n  featureName: 'ticket_summary',\n  taskType: 'summary',\n  model: 'balanced-model',\n  promptVersion: 'v3',\n  tenantId: 'tenant_42',\n  promptTokens: 820,\n  completionTokens: 140,\n  estimatedCost: 0.0042,\n})\n",[2769],{"type":27,"tag":480,"props":2770,"children":2771},{"__ignoreMap":7},[2772,2799,2817,2834,2851,2868,2885,2902,2919,2936],{"type":27,"tag":484,"props":2773,"children":2774},{"class":486,"line":487},[2775,2780,2785,2789,2794],{"type":27,"tag":484,"props":2776,"children":2777},{"style":508},[2778],{"type":33,"value":2779},"logger.",{"type":27,"tag":484,"props":2781,"children":2782},{"style":497},[2783],{"type":33,"value":2784},"info",{"type":27,"tag":484,"props":2786,"children":2787},{"style":508},[2788],{"type":33,"value":661},{"type":27,"tag":484,"props":2790,"children":2791},{"style":1146},[2792],{"type":33,"value":2793},"'llm.usage'",{"type":27,"tag":484,"props":2795,"children":2796},{"style":508},[2797],{"type":33,"value":2798},", {\n",{"type":27,"tag":484,"props":2800,"children":2801},{"class":486,"line":20},[2802,2807,2812],{"type":27,"tag":484,"props":2803,"children":2804},{"style":508},[2805],{"type":33,"value":2806},"  featureName: ",{"type":27,"tag":484,"props":2808,"children":2809},{"style":1146},[2810],{"type":33,"value":2811},"'ticket_summary'",{"type":27,"tag":484,"props":2813,"children":2814},{"style":508},[2815],{"type":33,"value":2816},",\n",{"type":27,"tag":484,"props":2818,"children":2819},{"class":486,"line":534},[2820,2825,2830],{"type":27,"tag":484,"props":2821,"children":2822},{"style":508},[2823],{"type":33,"value":2824},"  taskType: ",{"type":27,"tag":484,"props":2826,"children":2827},{"style":1146},[2828],{"type":33,"value":2829},"'summary'",{"type":27,"tag":484,"props":2831,"children":2832},{"style":508},[2833],{"type":33,"value":2816},{"type":27,"tag":484,"props":2835,"children":2836},{"class":486,"line":551},[2837,2842,2847],{"type":27,"tag":484,"props":2838,"children":2839},{"style":508},[2840],{"type":33,"value":2841},"  model: ",{"type":27,"tag":484,"props":2843,"children":2844},{"style":1146},[2845],{"type":33,"value":2846},"'balanced-model'",{"type":27,"tag":484,"props":2848,"children":2849},{"style":508},[2850],{"type":33,"value":2816},{"type":27,"tag":484,"props":2852,"children":2853},{"class":486,"line":560},[2854,2859,2864],{"type":27,"tag":484,"props":2855,"children":2856},{"style":508},[2857],{"type":33,"value":2858},"  promptVersion: ",{"type":27,"tag":484,"props":2860,"children":2861},{"style":1146},[2862],{"type":33,"value":2863},"'v3'",{"type":27,"tag":484,"props":2865,"children":2866},{"style":508},[2867],{"type":33,"value":2816},{"type":27,"tag":484,"props":2869,"children":2870},{"class":486,"line":569},[2871,2876,2881],{"type":27,"tag":484,"props":2872,"children":2873},{"style":508},[2874],{"type":33,"value":2875},"  tenantId: ",{"type":27,"tag":484,"props":2877,"children":2878},{"style":1146},[2879],{"type":33,"value":2880},"'tenant_42'",{"type":27,"tag":484,"props":2882,"children":2883},{"style":508},[2884],{"type":33,"value":2816},{"type":27,"tag":484,"props":2886,"children":2887},{"class":486,"line":590},[2888,2893,2898],{"type":27,"tag":484,"props":2889,"children":2890},{"style":508},[2891],{"type":33,"value":2892},"  promptTokens: ",{"type":27,"tag":484,"props":2894,"children":2895},{"style":528},[2896],{"type":33,"value":2897},"820",{"type":27,"tag":484,"props":2899,"children":2900},{"style":508},[2901],{"type":33,"value":2816},{"type":27,"tag":484,"props":2903,"children":2904},{"class":486,"line":607},[2905,2910,2915],{"type":27,"tag":484,"props":2906,"children":2907},{"style":508},[2908],{"type":33,"value":2909},"  completionTokens: ",{"type":27,"tag":484,"props":2911,"children":2912},{"style":528},[2913],{"type":33,"value":2914},"140",{"type":27,"tag":484,"props":2916,"children":2917},{"style":508},[2918],{"type":33,"value":2816},{"type":27,"tag":484,"props":2920,"children":2921},{"class":486,"line":624},[2922,2927,2932],{"type":27,"tag":484,"props":2923,"children":2924},{"style":508},[2925],{"type":33,"value":2926},"  estimatedCost: ",{"type":27,"tag":484,"props":2928,"children":2929},{"style":528},[2930],{"type":33,"value":2931},"0.0042",{"type":27,"tag":484,"props":2933,"children":2934},{"style":508},[2935],{"type":33,"value":2816},{"type":27,"tag":484,"props":2937,"children":2938},{"class":486,"line":632},[2939],{"type":27,"tag":484,"props":2940,"children":2941},{"style":508},[2942],{"type":33,"value":2943},"})\n",{"type":27,"tag":35,"props":2945,"children":2946},{},[2947],{"type":33,"value":2948},"有了这组信息后，你才能真正回答：",{"type":27,"tag":41,"props":2950,"children":2951},{},[2952,2957,2962,2967],{"type":27,"tag":45,"props":2953,"children":2954},{},[2955],{"type":33,"value":2956},"该优化哪一条 prompt",{"type":27,"tag":45,"props":2958,"children":2959},{},[2960],{"type":33,"value":2961},"哪个功能需要缓存",{"type":27,"tag":45,"props":2963,"children":2964},{},[2965],{"type":33,"value":2966},"哪个租户需要配额",{"type":27,"tag":45,"props":2968,"children":2969},{},[2970],{"type":33,"value":2971},"哪个模型是否该降级替换",{"type":27,"tag":171,"props":2973,"children":2974},{},[],{"type":27,"tag":175,"props":2976,"children":2978},{"id":2977},"十什么时候不该一味省-token",[2979],{"type":33,"value":2980},"十、什么时候不该一味省 token",{"type":27,"tag":35,"props":2982,"children":2983},{},[2984],{"type":33,"value":2985},"这点很重要。",{"type":27,"tag":35,"props":2987,"children":2988},{},[2989],{"type":33,"value":2990},"不是所有降本都值得做。",{"type":27,"tag":233,"props":2992,"children":2994},{"id":2993},"_101-不值得过度压缩的情况",[2995],{"type":33,"value":2996},"10.1 不值得过度压缩的情况",{"type":27,"tag":41,"props":2998,"children":2999},{},[3000,3005,3010,3015],{"type":27,"tag":45,"props":3001,"children":3002},{},[3003],{"type":33,"value":3004},"高价值用户的关键问答",{"type":27,"tag":45,"props":3006,"children":3007},{},[3008],{"type":33,"value":3009},"结果质量直接影响转化",{"type":27,"tag":45,"props":3011,"children":3012},{},[3013],{"type":33,"value":3014},"复杂推理场景本来就需要更多上下文",{"type":27,"tag":45,"props":3016,"children":3017},{},[3018],{"type":33,"value":3019},"过度缩短输出会让结果不可用",{"type":27,"tag":233,"props":3021,"children":3023},{"id":3022},"_102-一个成熟团队会这样看成本",[3024],{"type":33,"value":3025},"10.2 一个成熟团队会这样看成本",{"type":27,"tag":35,"props":3027,"children":3028},{},[3029],{"type":33,"value":3030},"不是只看“每月花多少钱”，而是看：",{"type":27,"tag":114,"props":3032,"children":3033},{},[3034],{"type":27,"tag":35,"props":3035,"children":3036},{},[3037],{"type":33,"value":3038},"这笔成本和获得的业务价值是否匹配。",{"type":27,"tag":35,"props":3040,"children":3041},{},[3042],{"type":33,"value":3043},"如果一个 AI 功能：",{"type":27,"tag":41,"props":3045,"children":3046},{},[3047,3052,3057,3062],{"type":27,"tag":45,"props":3048,"children":3049},{},[3050],{"type":33,"value":3051},"提升了留存",{"type":27,"tag":45,"props":3053,"children":3054},{},[3055],{"type":33,"value":3056},"节省了人工审核",{"type":27,"tag":45,"props":3058,"children":3059},{},[3060],{"type":33,"value":3061},"降低了客服成本",{"type":27,"tag":45,"props":3063,"children":3064},{},[3065],{"type":33,"value":3066},"提高了付费转化",{"type":27,"tag":35,"props":3068,"children":3069},{},[3070],{"type":33,"value":3071},"那它贵一点也可能是划算的。",{"type":27,"tag":35,"props":3073,"children":3074},{},[3075],{"type":33,"value":3076},"真正糟糕的是：",{"type":27,"tag":41,"props":3078,"children":3079},{},[3080,3085,3090,3095],{"type":27,"tag":45,"props":3081,"children":3082},{},[3083],{"type":33,"value":3084},"成本高",{"type":27,"tag":45,"props":3086,"children":3087},{},[3088],{"type":33,"value":3089},"用户感知弱",{"type":27,"tag":45,"props":3091,"children":3092},{},[3093],{"type":33,"value":3094},"指标无提升",{"type":27,"tag":45,"props":3096,"children":3097},{},[3098],{"type":33,"value":3099},"团队还说不清原因",{"type":27,"tag":171,"props":3101,"children":3102},{},[],{"type":27,"tag":175,"props":3104,"children":3106},{"id":3105},"十一一套可执行的-ai-成本治理顺序",[3107],{"type":33,"value":3108},"十一、一套可执行的 AI 成本治理顺序",{"type":27,"tag":233,"props":3110,"children":3112},{"id":3111},"第-1-步先打-usage-日志",[3113],{"type":33,"value":3114},"第 1 步：先打 usage 日志",{"type":27,"tag":35,"props":3116,"children":3117},{},[3118],{"type":33,"value":3119},"没有 usage，后面所有优化都是猜。",{"type":27,"tag":233,"props":3121,"children":3123},{"id":3122},"第-2-步看输入输出和重试结构",[3124],{"type":33,"value":3125},"第 2 步：看输入、输出和重试结构",{"type":27,"tag":35,"props":3127,"children":3128},{},[3129],{"type":33,"value":3130},"先确认钱到底花在哪。",{"type":27,"tag":233,"props":3132,"children":3134},{"id":3133},"第-3-步先做上下文裁剪和输出约束",[3135],{"type":33,"value":3136},"第 3 步：先做上下文裁剪和输出约束",{"type":27,"tag":35,"props":3138,"children":3139},{},[3140],{"type":33,"value":3141},"这是最便宜、收益最高的优化。",{"type":27,"tag":233,"props":3143,"children":3145},{"id":3144},"第-4-步再做模型分层和缓存复用",[3146],{"type":33,"value":3147},"第 4 步：再做模型分层和缓存复用",{"type":27,"tag":35,"props":3149,"children":3150},{},[3151],{"type":33,"value":3152},"把高成本调用放到最有价值的地方。",{"type":27,"tag":233,"props":3154,"children":3156},{"id":3155},"第-5-步最后补预算和自动保护",[3157],{"type":33,"value":3158},"第 5 步：最后补预算和自动保护",{"type":27,"tag":35,"props":3160,"children":3161},{},[3162],{"type":33,"value":3163},"让成本异常能自动被发现，必要时自动限流或降级。",{"type":27,"tag":171,"props":3165,"children":3166},{},[],{"type":27,"tag":175,"props":3168,"children":3170},{"id":3169},"十二给团队的-token-管理检查清单",[3171],{"type":33,"value":3172},"十二、给团队的 Token 管理检查清单",{"type":27,"tag":233,"props":3174,"children":3176},{"id":3175},"观测层",[3177],{"type":33,"value":3175},{"type":27,"tag":41,"props":3179,"children":3182},{"className":3180},[3181],"contains-task-list",[3183,3194,3203],{"type":27,"tag":45,"props":3184,"children":3187},{"className":3185},[3186],"task-list-item",[3188,3192],{"type":27,"tag":2068,"props":3189,"children":3191},{"disabled":18,"type":3190},"checkbox",[],{"type":33,"value":3193}," 是否记录了输入、输出 token 和估算成本",{"type":27,"tag":45,"props":3195,"children":3197},{"className":3196},[3186],[3198,3201],{"type":27,"tag":2068,"props":3199,"children":3200},{"disabled":18,"type":3190},[],{"type":33,"value":3202}," 是否能按功能、模型、租户做归因",{"type":27,"tag":45,"props":3204,"children":3206},{"className":3205},[3186],[3207,3210],{"type":27,"tag":2068,"props":3208,"children":3209},{"disabled":18,"type":3190},[],{"type":33,"value":3211}," 是否有日级和周级趋势图",{"type":27,"tag":233,"props":3213,"children":3215},{"id":3214},"优化层",[3216],{"type":33,"value":3214},{"type":27,"tag":41,"props":3218,"children":3220},{"className":3219},[3181],[3221,3230,3239,3248,3257],{"type":27,"tag":45,"props":3222,"children":3224},{"className":3223},[3186],[3225,3228],{"type":27,"tag":2068,"props":3226,"children":3227},{"disabled":18,"type":3190},[],{"type":33,"value":3229}," 是否裁剪历史上下文",{"type":27,"tag":45,"props":3231,"children":3233},{"className":3232},[3186],[3234,3237],{"type":27,"tag":2068,"props":3235,"children":3236},{"disabled":18,"type":3190},[],{"type":33,"value":3238}," 是否限制输出长度和结构",{"type":27,"tag":45,"props":3240,"children":3242},{"className":3241},[3186],[3243,3246],{"type":27,"tag":2068,"props":3244,"children":3245},{"disabled":18,"type":3190},[],{"type":33,"value":3247}," 是否避免无意义重试",{"type":27,"tag":45,"props":3249,"children":3251},{"className":3250},[3186],[3252,3255],{"type":27,"tag":2068,"props":3253,"children":3254},{"disabled":18,"type":3190},[],{"type":33,"value":3256}," 是否对高复用结果做缓存",{"type":27,"tag":45,"props":3258,"children":3260},{"className":3259},[3186],[3261,3264],{"type":27,"tag":2068,"props":3262,"children":3263},{"disabled":18,"type":3190},[],{"type":33,"value":3265}," 是否按任务类型做模型分层",{"type":27,"tag":233,"props":3267,"children":3269},{"id":3268},"保护层",[3270],{"type":33,"value":3268},{"type":27,"tag":41,"props":3272,"children":3274},{"className":3273},[3181],[3275,3284,3293,3302],{"type":27,"tag":45,"props":3276,"children":3278},{"className":3277},[3186],[3279,3282],{"type":27,"tag":2068,"props":3280,"children":3281},{"disabled":18,"type":3190},[],{"type":33,"value":3283}," 是否有预算告警",{"type":27,"tag":45,"props":3285,"children":3287},{"className":3286},[3186],[3288,3291],{"type":27,"tag":2068,"props":3289,"children":3290},{"disabled":18,"type":3190},[],{"type":33,"value":3292}," 是否有超预算时的降级策略",{"type":27,"tag":45,"props":3294,"children":3296},{"className":3295},[3186],[3297,3300],{"type":27,"tag":2068,"props":3298,"children":3299},{"disabled":18,"type":3190},[],{"type":33,"value":3301}," 是否有限流或配额控制",{"type":27,"tag":45,"props":3303,"children":3305},{"className":3304},[3186],[3306,3309],{"type":27,"tag":2068,"props":3307,"children":3308},{"disabled":18,"type":3190},[],{"type":33,"value":3310}," 是否能快速定位异常成本上涨的版本或功能",{"type":27,"tag":171,"props":3312,"children":3313},{},[],{"type":27,"tag":175,"props":3315,"children":3317},{"id":3316},"总结",[3318],{"type":33,"value":3316},{"type":27,"tag":35,"props":3320,"children":3321},{},[3322],{"type":33,"value":3323},"把 Token 管理与成本控制讲透，可以收敛成 5 句话：",{"type":27,"tag":137,"props":3325,"children":3326},{},[3327,3335,3343,3351,3359],{"type":27,"tag":45,"props":3328,"children":3329},{},[3330],{"type":27,"tag":121,"props":3331,"children":3332},{},[3333],{"type":33,"value":3334},"多数 AI 成本问题，根源不是模型贵，而是调用方式浪费。",{"type":27,"tag":45,"props":3336,"children":3337},{},[3338],{"type":27,"tag":121,"props":3339,"children":3340},{},[3341],{"type":33,"value":3342},"先建立单次、单功能、单用户的成本视角，再谈优化。",{"type":27,"tag":45,"props":3344,"children":3345},{},[3346],{"type":27,"tag":121,"props":3347,"children":3348},{},[3349],{"type":33,"value":3350},"最值钱的降本手段通常是裁剪上下文、约束输出、减少重复调用。",{"type":27,"tag":45,"props":3352,"children":3353},{},[3354],{"type":27,"tag":121,"props":3355,"children":3356},{},[3357],{"type":33,"value":3358},"模型分层和缓存复用，是让成本和体验同时可控的关键。",{"type":27,"tag":45,"props":3360,"children":3361},{},[3362],{"type":27,"tag":121,"props":3363,"children":3364},{},[3365],{"type":33,"value":3366},"预算、告警、归因必须上线前就准备，不要等账单教育团队。",{"type":27,"tag":35,"props":3368,"children":3369},{},[3370],{"type":33,"value":3371},"如果你只记住一句话，我希望是这一句：",{"type":27,"tag":114,"props":3373,"children":3374},{},[3375],{"type":27,"tag":35,"props":3376,"children":3377},{},[3378],{"type":33,"value":3379},"成本控制不是把 AI 变便宜，而是让每一个 token 都花在真正值钱的地方。",{"type":27,"tag":35,"props":3381,"children":3382},{},[3383],{"type":33,"value":3384},"否则产品上线得越顺，月底最先变得不顺的通常不是接口，而是——",{"type":27,"tag":35,"props":3386,"children":3387},{},[3388],{"type":27,"tag":121,"props":3389,"children":3390},{},[3391],{"type":33,"value":3392},"财务同学的表情。",{"type":27,"tag":3394,"props":3395,"children":3396},"style",{},[3397],{"type":33,"value":3398},"html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}",{"title":7,"searchDepth":534,"depth":534,"links":3400},[3401,3405,3410,3419,3426,3431,3436,3443,3448,3452,3456,3463,3468],{"id":177,"depth":20,"text":180,"children":3402},[3403,3404],{"id":235,"depth":534,"text":238},{"id":274,"depth":534,"text":277},{"id":332,"depth":20,"text":335,"children":3406},[3407,3408,3409],{"id":348,"depth":534,"text":351},{"id":434,"depth":534,"text":437},{"id":468,"depth":534,"text":471},{"id":808,"depth":20,"text":811,"children":3411},[3412,3418],{"id":819,"depth":534,"text":822,"children":3413},[3414,3415,3416,3417],{"id":826,"depth":551,"text":829},{"id":855,"depth":551,"text":858},{"id":889,"depth":551,"text":892},{"id":923,"depth":551,"text":926},{"id":957,"depth":534,"text":960},{"id":1058,"depth":20,"text":1061,"children":3420},[3421,3424,3425],{"id":1064,"depth":534,"text":1067,"children":3422},[3423],{"id":1075,"depth":551,"text":1075},{"id":1103,"depth":534,"text":1106},{"id":1524,"depth":534,"text":1527},{"id":1571,"depth":20,"text":1574,"children":3427},[3428,3429,3430],{"id":1582,"depth":534,"text":1585},{"id":1666,"depth":534,"text":1669},{"id":1700,"depth":534,"text":1703},{"id":1918,"depth":20,"text":1921,"children":3432},[3433,3434,3435],{"id":1929,"depth":534,"text":1932},{"id":1963,"depth":534,"text":1966},{"id":1987,"depth":534,"text":1990},{"id":2307,"depth":20,"text":2310,"children":3437},[3438,3439],{"id":2313,"depth":534,"text":2316},{"id":2342,"depth":534,"text":2345,"children":3440},[3441,3442],{"id":2348,"depth":551,"text":2351},{"id":2359,"depth":551,"text":2362},{"id":2499,"depth":20,"text":2502,"children":3444},[3445,3446,3447],{"id":2505,"depth":534,"text":2508},{"id":2572,"depth":534,"text":2575},{"id":2596,"depth":534,"text":2599},{"id":2638,"depth":20,"text":2641,"children":3449},[3450,3451],{"id":2672,"depth":534,"text":2675},{"id":2761,"depth":534,"text":2764},{"id":2977,"depth":20,"text":2980,"children":3453},[3454,3455],{"id":2993,"depth":534,"text":2996},{"id":3022,"depth":534,"text":3025},{"id":3105,"depth":20,"text":3108,"children":3457},[3458,3459,3460,3461,3462],{"id":3111,"depth":534,"text":3114},{"id":3122,"depth":534,"text":3125},{"id":3133,"depth":534,"text":3136},{"id":3144,"depth":534,"text":3147},{"id":3155,"depth":534,"text":3158},{"id":3169,"depth":20,"text":3172,"children":3464},[3465,3466,3467],{"id":3175,"depth":534,"text":3175},{"id":3214,"depth":534,"text":3214},{"id":3268,"depth":534,"text":3268},{"id":3316,"depth":20,"text":3316},"markdown","content:topics:engineering:token-management-cost-control-guide.md","topics/engineering/token-management-cost-control-guide.md","topics/engineering/token-management-cost-control-guide","md",1777109948273]