> ## Documentation Index
> Fetch the complete documentation index at: https://docs.sglang.io/llms.txt
> Use this file to discover all available pages before exploring further.

# DeepSeek-V4

> Deploy DeepSeek-V4 with SGLang — verified launch commands, benchmarks, and tuning for the Flash (284B) and Pro (1.6T) Mixture-of-Experts models.

export const Playground = ({config}) => {
  if (!config) {
    return <div style={{
      padding: 12,
      color: "#b91c1c"
    }}>Playground: missing <code>config</code> prop</div>;
  }
  const DIMENSIONS = ["hw", "variant", "quant", "strategy", "nodes"];
  const STORAGE_KEY = "sglang-deploy-env";
  const pgFeatures = config.playgroundFeatures || ({});
  const PD_PORTS = {
    prefill: {
      serve: 30000,
      dist: 30335
    },
    decode: {
      serve: 30001,
      dist: 30435
    }
  };
  const findCell = (cells, sel) => cells.find(c => DIMENSIONS.every(d => c.match[d] === sel[d]));
  const findMatchingCell = (cells, sel, pgEnv, pgFlags) => {
    const flagsEq = (a, b) => a.length === b.length && a.every((x, i) => x === b[i]);
    const envEq = (a, b) => {
      if (a.length !== b.length) return false;
      const set = new Set(a);
      for (const x of b) if (!set.has(x)) return false;
      return true;
    };
    for (const c of cells) {
      if (c.match.hw !== sel.hw) continue;
      if (c.match.variant !== sel.variant) continue;
      if (c.match.quant !== sel.quant) continue;
      if (c.match.nodes !== sel.nodes) continue;
      if (flagsEq(c.flags || [], pgFlags || []) && envEq(c.env || [], pgEnv || [])) {
        return c;
      }
    }
    return null;
  };
  const resolveModelName = sel => {
    const triple = `${sel.hw}|${sel.variant}|${sel.quant}`;
    const pair = `${sel.variant}|${sel.quant}`;
    return (config.modelNames[triple] ?? config.modelNames[pair]) ?? "";
  };
  const interpolate = (text, env, modelName) => text.replace(/{{(\w+)}}/g, (_, key) => key === "MODEL_NAME" ? modelName : env[key] ?? `{{${key}}}`);
  const parseNnodes = id => {
    if (id === "single") return 1;
    const m = (/^multi-(\d+)$/).exec(id);
    return m ? parseInt(m[1], 10) : 1;
  };
  const placeholderDefaults = schema => {
    const out = {};
    for (const [k, v] of Object.entries(schema || ({}))) out[k] = v.default ?? "";
    return out;
  };
  const matchConstraint = (base, constraint) => {
    if (!constraint || typeof constraint !== "object") return false;
    const entries = Object.entries(constraint);
    if (entries.length === 0) return false;
    return entries.every(([k, vs]) => Array.isArray(vs) && vs.includes(base[k]));
  };
  const evaluateChip = (entry, base) => {
    if (entry === null || typeof entry !== "object") {
      return {
        value: entry,
        label: undefined,
        hidden: false,
        disabled: false,
        disableReason: ""
      };
    }
    const hidden = entry.hide ? matchConstraint(base, entry.hide) : false;
    let disabled = entry.disabled === true || entry.disable === true;
    if (!disabled && entry.disable && typeof entry.disable === "object") {
      disabled = matchConstraint(base, entry.disable);
    }
    return {
      ...entry,
      value: entry.id !== undefined ? entry.id : entry.value,
      label: entry.label,
      hidden,
      disabled,
      disableReason: entry.disableReason || ""
    };
  };
  const findEntry = (entries, picked) => {
    for (const e of entries || []) {
      const v = e === null || typeof e !== "object" ? e : e.id !== undefined ? e.id : e.value;
      if (v === picked) return e;
    }
    return null;
  };
  const isHidden = (entries, picked, base) => {
    const e = findEntry(entries, picked);
    if (e === null || e === undefined) return false;
    return evaluateChip(e, base).hidden;
  };
  const stripFlagsByFirstToken = (flags, prefixes) => {
    const set = new Set(prefixes);
    return flags.filter(f => !set.has(f.split(/[\s=]/)[0]));
  };
  const stripEnvByPrefix = (envList, prefixes) => {
    if (!prefixes || !prefixes.length) return envList;
    const set = new Set(prefixes);
    return envList.filter(e => !set.has(e.split("=")[0]));
  };
  const insertBeforeTail = (flags, additions) => {
    const idx = flags.findIndex(f => f.startsWith("--host"));
    const at = idx === -1 ? flags.length : idx;
    const out = flags.slice();
    out.splice(at, 0, ...additions);
    return out;
  };
  const insertAfter = (flags, afterAnyOf, additions) => {
    let idx = -1;
    for (const anchor of afterAnyOf) {
      idx = flags.findIndex(f => f.split(/[\s=]/)[0] === anchor);
      if (idx !== -1) break;
    }
    if (idx === -1) idx = flags.findIndex(f => f.startsWith("--model-path"));
    const out = flags.slice();
    out.splice(idx + 1, 0, ...additions);
    return out;
  };
  const parseIntFlag = (flags, prefix) => {
    for (const f of flags || []) {
      if (f.split(/[\s=]/)[0] !== prefix) continue;
      const rest = f.slice(prefix.length).replace(/^[\s=]+/, "");
      const n = parseInt(rest, 10);
      if (!isNaN(n)) return n;
    }
    return null;
  };
  const hasFlag = (flags, name) => (flags || []).some(f => f.split(/[\s=]/)[0] === name);
  const findFlagArg = (flags, prefix) => {
    for (const f of flags || []) {
      if (f.split(/[\s=]/)[0] !== prefix) continue;
      const rest = f.slice(prefix.length).replace(/^[\s=]+/, "");
      return rest.length ? rest : null;
    }
    return null;
  };
  const ANCHOR_NEAR_MODEL_PATH = ["--model-path"];
  const ANCHOR_NEAR_TP = ["--tp", "--model-path"];
  const ANCHOR_NEAR_DP = ["--dp", "--tp", "--model-path"];
  const ANCHOR_NEAR_DPATTN = ["--enable-dp-attention", "--dp", "--tp", "--model-path"];
  const ANCHOR_NEAR_MOE = ["--moe-a2a-backend", "--moe-runner-backend", "--enable-dp-attention", "--dp", "--tp", "--model-path"];
  const helpers = {
    matchConstraint,
    evaluateChip,
    findEntry,
    isHidden,
    stripFlagsByFirstToken,
    stripEnvByPrefix,
    insertBeforeTail,
    insertAfter,
    parseIntFlag,
    hasFlag,
    findFlagArg,
    ANCHOR_NEAR_MODEL_PATH,
    ANCHOR_NEAR_TP,
    ANCHOR_NEAR_DP,
    ANCHOR_NEAR_DPATTN,
    ANCHOR_NEAR_MOE
  };
  const AXIS_HANDLERS = {
    attention: {
      initState: () => ({
        tp: null,
        cp: null,
        dpAttn: null
      }),
      deriveFromBase: (cell, fc, h) => {
        const flags = cell && cell.flags || [];
        const dpVal = h.parseIntFlag(flags, "--dp");
        const hasDpAttn = h.hasFlag(flags, "--enable-dp-attention");
        let dpAttn;
        if (dpVal !== null) dpAttn = dpVal; else if (hasDpAttn) dpAttn = 1; else dpAttn = false;
        return {
          tp: h.parseIntFlag(flags, "--tp"),
          cp: h.hasFlag(flags, "--enable-nsa-prefill-context-parallel") ? 2 : null,
          dpAttn
        };
      },
      revertHidden: (value, fc, base, h) => {
        let changed = false;
        const next = {
          ...value
        };
        for (const knob of fc.knobs || []) {
          const cur = next[knob.id];
          if (cur !== null && cur !== undefined && h.isHidden(knob.values, cur, base)) {
            next[knob.id] = null;
            changed = true;
          }
        }
        return changed ? next : value;
      },
      apply: ({flags, env, value, h}) => {
        if (value.tp !== null) {
          flags = h.stripFlagsByFirstToken(flags, ["--tp"]);
          flags = h.insertAfter(flags, h.ANCHOR_NEAR_MODEL_PATH, [`--tp ${value.tp}`]);
        }
        if (value.dpAttn !== null && value.dpAttn !== undefined) {
          flags = h.stripFlagsByFirstToken(flags, ["--dp", "--enable-dp-attention"]);
          if (typeof value.dpAttn === "number" && value.dpAttn > 0) {
            flags = h.insertAfter(flags, h.ANCHOR_NEAR_TP, [`--dp ${value.dpAttn}`, "--enable-dp-attention"]);
          }
        }
        if (value.cp !== null) {
          flags = h.stripFlagsByFirstToken(flags, ["--enable-nsa-prefill-context-parallel", "--nsa-prefill-cp-mode"]);
          if (value.cp > 1) {
            flags = h.insertAfter(flags, h.ANCHOR_NEAR_DPATTN, ["--enable-nsa-prefill-context-parallel", "--nsa-prefill-cp-mode round-robin-split"]);
          }
        }
        return {
          flags,
          env
        };
      },
      render: ({axisId, value, setValue, fc, base, s, renderSelect, derived}) => {
        const knobs = fc.knobs || [];
        if (!knobs.length) return null;
        const setKnob = (k, v) => setValue({
          ...value,
          [k]: v
        });
        const labelFor = knob => c => {
          if (c.label !== undefined) return c.label;
          if (knob.id === "dpAttn") {
            const labelMap = knob.labels || ({
              "auto": "Auto",
              "false": "Off"
            });
            const k = c.value === null ? "auto" : String(c.value);
            return labelMap[k] || k;
          }
          return c.value === null ? "Auto" : String(c.value);
        };
        const knobDisplay = knob => {
          const v = value[knob.id];
          if (v !== null && v !== undefined) return v;
          if (derived && derived[knob.id] !== undefined) return derived[knob.id];
          return null;
        };
        const hideNullFor = knob => {
          const d = derived ? derived[knob.id] : null;
          return d !== null && d !== undefined ? [null] : [];
        };
        return <div key={axisId} style={s.card}>
            <div style={s.compactRow}>
              <span style={s.axisTitle}>Attention</span>
              {knobs.map(knob => <span key={knob.id} style={s.field}>
                  <span style={s.fieldLabel}>{knob.label || knob.id.toUpperCase()}</span>
                  {renderSelect(knobDisplay(knob), knob.values || [null], nv => setKnob(knob.id, nv), base, labelFor(knob), {
          hideValues: hideNullFor(knob)
        })}
                </span>)}
            </div>
          </div>;
      }
    },
    moe: {
      initState: () => ({
        backend: null,
        ep: null,
        mmQuant: null
      }),
      deriveFromBase: (cell, fc, h) => {
        const flags = cell && cell.flags || [];
        const baseEnv = cell && cell.env || [];
        const a2a = h.findFlagArg(flags, "--moe-a2a-backend");
        const runner = h.findFlagArg(flags, "--moe-runner-backend");
        const fp4Acts = baseEnv.some(e => e.startsWith("SGLANG_OPT_DEEPGEMM_MEGA_MOE_USE_FP4_ACTS"));
        return {
          backend: a2a || runner || null,
          ep: h.parseIntFlag(flags, "--ep"),
          mmQuant: fp4Acts ? "w4a4" : "w4a8"
        };
      },
      revertHidden: (value, fc, base, h) => {
        let changed = false;
        const next = {
          ...value
        };
        if (next.backend !== null && fc.backend?.options && h.isHidden(fc.backend.options, next.backend, base)) {
          next.backend = null;
          changed = true;
        }
        const mmOpt = (fc.backend?.options || []).find(o => o.id === "megamoe");
        const mmAvail = !!mmOpt && (!mmOpt.requiresHw || mmOpt.requiresHw.includes(base.hw)) && (!mmOpt.excludesStrategy || !mmOpt.excludesStrategy.includes(base.strategy));
        if (next.backend === "megamoe" && !mmAvail) {
          next.backend = null;
          changed = true;
        }
        if (next.ep !== null && fc.ep?.values && h.isHidden(fc.ep.values, next.ep, base)) {
          next.ep = null;
          changed = true;
        }
        return changed ? next : value;
      },
      apply: ({flags, env, value, fc, h, derived}) => {
        if (value.backend !== null) {
          flags = h.stripFlagsByFirstToken(flags, ["--moe-a2a-backend", "--moe-runner-backend"]);
          const opt = (fc.backend?.options || []).find(o => o.id === value.backend);
          if (opt?.flags?.length) {
            flags = h.insertAfter(flags, h.ANCHOR_NEAR_DPATTN, opt.flags);
          }
        }
        const mq = fc.megamoeQuant;
        if (mq) {
          const quantKeys = [];
          for (const o of mq.options || []) {
            for (const e of o.env || []) quantKeys.push(e.split("=")[0]);
          }
          const effBackend = value.backend !== null ? value.backend : derived && derived.backend;
          if (effBackend === "megamoe") {
            env = h.stripEnvByPrefix(env, [...mq.stripEnv || [], ...quantKeys]);
            const quant = value.mmQuant != null ? value.mmQuant : derived && derived.mmQuant || "w4a8";
            const opt = (mq.options || []).find(o => o.id === quant);
            if (opt?.env?.length) env = [...env, ...opt.env];
          } else if (value.backend !== null) {
            env = h.stripEnvByPrefix(env, quantKeys);
          }
        }
        if (value.ep !== null) {
          flags = h.stripFlagsByFirstToken(flags, ["--ep"]);
          if (value.ep > 1) {
            flags = h.insertAfter(flags, h.ANCHOR_NEAR_MOE, [`--ep ${value.ep}`]);
          }
        }
        return {
          flags,
          env
        };
      },
      render: ({axisId, value, setValue, fc, base, s, renderSelect, derived}) => {
        if (!fc.backend && !fc.ep) return null;
        const setSlot = (k, v) => setValue({
          ...value,
          [k]: v
        });
        const slotDisplay = k => {
          const v = value[k];
          if (v !== null && v !== undefined) return v;
          if (derived && derived[k] !== undefined) return derived[k];
          return null;
        };
        const hideNull = k => {
          const d = derived ? derived[k] : null;
          return d !== null && d !== undefined ? [null] : [];
        };
        const mmOpt = (fc.backend?.options || []).find(o => o.id === "megamoe");
        const mmAvail = !!mmOpt && (!mmOpt.requiresHw || mmOpt.requiresHw.includes(base.hw)) && (!mmOpt.excludesStrategy || !mmOpt.excludesStrategy.includes(base.strategy));
        const backendIsMega = slotDisplay("backend") === "megamoe";
        return <div key={axisId} style={s.card}>
            <div style={s.compactRow}>
              <span style={s.axisTitle}>MoE</span>
              {fc.backend && <span style={s.field}>
                  <span style={s.fieldLabel}>Backend</span>
                  {renderSelect(slotDisplay("backend"), fc.backend.options || [], v => setSlot("backend", v), base, undefined, {
          hideValues: [...hideNull("backend"), ...mmAvail ? [] : ["megamoe"]]
        })}
                </span>}
              {fc.megamoeQuant && backendIsMega && <span style={s.field}>
                  <span style={s.fieldLabel}>Quantization</span>
                  {renderSelect(value.mmQuant != null ? value.mmQuant : derived && derived.mmQuant || "w4a8", fc.megamoeQuant.options || [], v => setSlot("mmQuant", v), base)}
                </span>}
              {fc.ep && <span style={s.field}>
                  <span style={s.fieldLabel}>{fc.ep.label || "EP"}</span>
                  {renderSelect(slotDisplay("ep"), fc.ep.values || [null], v => setSlot("ep", v), base, undefined, {
          hideValues: hideNull("ep")
        })}
                </span>}
            </div>
          </div>;
      }
    },
    parsers: {
      initState: fc => {
        const out = {};
        for (const item of fc.items || []) out[item.id] = null;
        return out;
      },
      deriveFromBase: (cell, fc, h) => {
        const flags = cell && cell.flags || [];
        const out = {};
        for (const item of fc.items || []) {
          const prefix = item.flag.split(/[\s=]/)[0];
          out[item.id] = h.hasFlag(flags, prefix);
        }
        return out;
      },
      revertHidden: (value, fc, base, h) => {
        let changed = false;
        const next = {
          ...value
        };
        for (const item of fc.items || []) {
          if (next[item.id] !== null && next[item.id] !== undefined && h.evaluateChip(item, base).hidden) {
            next[item.id] = null;
            changed = true;
          }
        }
        return changed ? next : value;
      },
      apply: ({flags, env, value, fc, h, derived}) => {
        const items = fc.items || [];
        const eff = {};
        const baseOf = {};
        for (const item of items) {
          baseOf[item.id] = derived ? !!derived[item.id] : false;
          const v = value[item.id];
          eff[item.id] = v === null || v === undefined ? baseOf[item.id] : v;
        }
        const anyOverride = items.some(it => eff[it.id] !== baseOf[it.id]);
        if (!anyOverride) return {
          flags,
          env
        };
        flags = h.stripFlagsByFirstToken(flags, ["--reasoning-parser", "--tool-call-parser"]);
        const adds = [];
        for (const item of items) {
          if (eff[item.id]) adds.push(item.flag);
        }
        if (adds.length) flags = h.insertBeforeTail(flags, adds);
        return {
          flags,
          env
        };
      },
      render: ({axisId, value, setValue, fc, base, s, h, renderChip, derived}) => {
        const visible = (fc.items || []).map(item => ({
          item,
          c: h.evaluateChip(item, base)
        })).filter(({c}) => !c.hidden);
        if (visible.length === 0) return null;
        const effOn = id => {
          const v = value[id];
          if (v !== null && v !== undefined) return v;
          if (derived && derived[id] !== undefined) return derived[id];
          return false;
        };
        return <div key={axisId} style={s.card}>
            <div style={s.compactRow}>
              <span style={s.axisTitle}>Parsers</span>
              {visible.map(({item, c}) => <span key={item.id} style={s.field}>
                  {renderChip(item.label, effOn(item.id), true, () => setValue({
          ...value,
          [item.id]: !effOn(item.id)
        }), {
          disabled: c.disabled,
          disabledReason: c.disableReason
        })}
                </span>)}
            </div>
          </div>;
      }
    },
    speculative: {
      initState: () => "current",
      deriveFromBase: (cell, fc) => {
        const flags = cell && cell.flags || [];
        const baseSpec = flags.filter(f => {
          const head = f.split(/[\s=]/)[0];
          return head === "--speculative-algorithm" || head === "--speculative-num-steps" || head === "--speculative-eagle-topk" || head === "--speculative-num-draft-tokens" || head === "--speculative-ngram-max-bfs-breadth";
        });
        if (baseSpec.length === 0) return "off";
        for (const opt of fc.options || []) {
          if (!opt.flags || opt.flags.length !== baseSpec.length) continue;
          const ok = opt.flags.every(pf => baseSpec.includes(pf));
          if (ok) return opt.id;
        }
        return "current";
      },
      revertHidden: (value, fc, base, h) => {
        if (value !== "current" && h.isHidden(fc.options || [], value, base)) {
          return "current";
        }
        return value;
      },
      apply: ({flags, env, value, fc, h, derived}) => {
        if (value === "current") return {
          flags,
          env
        };
        if (derived && value === derived) return {
          flags,
          env
        };
        const picked = (fc.options || []).find(p => p.id === value);
        if (picked && h.evaluateChip(picked, {
          dpAttnOn: h.hasFlag(flags, "--enable-dp-attention")
        }).disabled) {
          return {
            flags,
            env
          };
        }
        flags = h.stripFlagsByFirstToken(flags, ["--speculative-algorithm", "--speculative-num-steps", "--speculative-eagle-topk", "--speculative-num-draft-tokens", "--speculative-ngram-max-bfs-breadth"]);
        const preset = (fc.options || []).find(p => p.id === value);
        if (preset?.flags?.length) flags = h.insertBeforeTail(flags, preset.flags);
        return {
          flags,
          env
        };
      },
      render: ({axisId, value, setValue, fc, base, s, h, renderChip, derived}) => {
        const opts = fc.options || [];
        if (!opts.length) return null;
        const display = value !== "current" ? value : derived ? derived : "current";
        const hideCurrent = !!(derived && derived !== "current");
        const visible = opts.map(opt => h.evaluateChip(opt, base)).filter(c => !c.hidden && !(hideCurrent && c.value === "current"));
        if (visible.length === 0) return null;
        return <div key={axisId} style={s.card}>
            <div style={s.compactRow}>
              <span style={s.axisTitle}>Speculative</span>
              {visible.map(c => <span key={c.value} style={s.field}>
                  {renderChip(c.label, display, c.value, () => setValue(c.value), {
          disabled: c.disabled,
          disabledReason: c.disableReason
        })}
                </span>)}
            </div>
          </div>;
      }
    },
    pdDisagg: {
      initState: () => ({
        mode: "off",
        transferBackend: "mooncake",
        ibDevice: "auto"
      }),
      revertHidden: (value, fc, base, h) => {
        let changed = false;
        const next = {
          ...value
        };
        if (next.mode !== "off" && fc.modes && h.isHidden(fc.modes, next.mode, base)) {
          next.mode = "off";
          changed = true;
        }
        if (next.ibDevice !== "auto" && fc.ibDevices && h.isHidden(fc.ibDevices, next.ibDevice, base)) {
          next.ibDevice = "auto";
          changed = true;
        }
        return changed ? next : value;
      },
      apply: ({flags, env, value, sel, fc, h}) => {
        flags = h.stripFlagsByFirstToken(flags, ["--disaggregation-mode", "--disaggregation-transfer-backend", "--disaggregation-ib-device", "--disaggregation-bootstrap-port"]);
        const backends = fc.transferBackends || [];
        if (value.mode === "prefill" || value.mode === "decode") {
          const backend = value.transferBackend || "mooncake";
          const adds = [`--disaggregation-mode ${value.mode}`, `--disaggregation-transfer-backend ${backend}`];
          if (value.ibDevice && value.ibDevice !== "auto") {
            adds.push(`--disaggregation-ib-device ${value.ibDevice}`);
          }
          if (sel.nodes === "single" && !flags.some(f => f.startsWith("--dist-init-addr"))) {
            adds.push(`--dist-init-addr 127.0.0.1:${PD_PORTS[value.mode].dist}`);
          }
          flags = h.insertBeforeTail(flags, adds);
          const servePort = PD_PORTS[value.mode].serve;
          flags = flags.map(f => f.split(/[\s=]/)[0] === "--port" ? `--port ${servePort}` : f);
          const meta = backends.find(b => b.id === backend);
          if (meta && meta.env && meta.env.length) {
            const gate = meta.envWhen;
            const ok = !gate || Object.keys(gate).every(k => (gate[k] || []).includes(sel[k]));
            if (ok) env = [...env, ...meta.env.filter(e => !env.includes(e))];
          }
        }
        return {
          flags,
          env
        };
      },
      getRenderHints: value => {
        if (value.mode === "prefill" || value.mode === "decode") {
          return {
            pdMode: value.mode
          };
        }
        return null;
      },
      render: ({axisId, value, setValue, fc, base, s, renderSelect}) => {
        const setSlot = (k, v) => setValue({
          ...value,
          [k]: v
        });
        const showModes = (fc.modes || []).length > 0;
        const showBackends = (fc.transferBackends || []).length > 0;
        const showIb = (fc.ibDevices || []).length > 0;
        if (!showModes && !showBackends && !showIb) return null;
        return <div key={axisId} style={s.card}>
            <div style={s.compactRow}>
              <span style={s.axisTitle}>PD Disagg</span>
              {showModes && <span style={s.field}>
                  <span style={s.fieldLabel}>Mode</span>
                  {renderSelect(value.mode, fc.modes, v => setSlot("mode", v), base)}
                </span>}
              {showBackends && <span style={s.field}>
                  <span style={s.fieldLabel}>Transfer Backend</span>
                  {renderSelect(value.transferBackend, fc.transferBackends, v => setSlot("transferBackend", v), base)}
                </span>}
              {showIb && <span style={s.field}>
                  <span style={s.fieldLabel}>IB Device</span>
                  {renderSelect(value.ibDevice, fc.ibDevices, v => setSlot("ibDevice", v), base)}
                </span>}
            </div>
          </div>;
      }
    },
    hisparse: {
      initState: fc => ({
        enable: false,
        hostRatio: fc && fc.defaultHostRatio || null
      }),
      revertHidden: (value, fc, base, h) => {
        if (value.hostRatio !== null && fc.hostRatios && h.isHidden(fc.hostRatios, value.hostRatio, base)) {
          return {
            ...value,
            hostRatio: fc && fc.defaultHostRatio || null
          };
        }
        return value;
      },
      apply: ({flags, env, value, fc, h}) => {
        const ownedHeads = ["--enable-hisparse", "--hisparse-config", ...(fc.requiredFlags || []).map(f => f.split(/\s/)[0])];
        flags = h.stripFlagsByFirstToken(flags, ownedHeads);
        const isDecode = flags.includes("--disaggregation-mode decode");
        if (value.enable && isDecode) {
          const ratio = value.hostRatio !== null && value.hostRatio !== undefined ? value.hostRatio : fc.defaultHostRatio || 10;
          const cfg = {
            ...fc.config || ({}),
            host_to_device_ratio: ratio
          };
          const adds = [...fc.requiredFlags || [], "--enable-hisparse", `--hisparse-config '${JSON.stringify(cfg)}'`];
          flags = h.insertBeforeTail(flags, adds);
        }
        return {
          flags,
          env
        };
      },
      render: ({axisId, value, setValue, fc, base, s, renderChip, renderSelect}) => {
        if (base.pdMode !== "decode") return null;
        const setSlot = (k, v) => setValue({
          ...value,
          [k]: v
        });
        const hasRatios = (fc.hostRatios || []).length > 0;
        return <div key={axisId} style={s.card}>
            <div style={s.compactRow}>
              <span style={s.axisTitle}>HiSparse</span>
              <span style={s.field}>
                {renderChip("Enable", value.enable, true, () => setSlot("enable", !value.enable))}
              </span>
              {hasRatios && <span style={s.field}>
                  <span style={s.fieldLabel}>Host ratio</span>
                  {renderSelect(value.hostRatio, fc.hostRatios, v => setSlot("hostRatio", v), base)}
                </span>}
            </div>
          </div>;
      }
    },
    hicache: {
      initState: () => ({
        enable: false,
        backend: null,
        writePolicy: "auto"
      }),
      revertHidden: (value, fc, base, h) => {
        let changed = false;
        const next = {
          ...value
        };
        if (next.backend !== null && fc.backends && h.isHidden(fc.backends, next.backend, base)) {
          next.backend = null;
          changed = true;
        }
        if (next.writePolicy !== "auto" && fc.writePolicies && h.isHidden(fc.writePolicies, next.writePolicy, base)) {
          next.writePolicy = "auto";
          changed = true;
        }
        return changed ? next : value;
      },
      apply: ({flags, env, value, fc, sel, h}) => {
        if (fc.excludesHw && sel && fc.excludesHw.includes(sel.hw)) return {
          flags,
          env
        };
        flags = h.stripFlagsByFirstToken(flags, ["--enable-hierarchical-cache", "--hicache-ratio", "--hicache-size", "--hicache-write-policy", "--hicache-mem-layout", "--hicache-io-backend", "--hicache-storage-backend", "--hicache-storage-prefetch-policy"]);
        if (value.enable) {
          const adds = ["--enable-hierarchical-cache", "--hicache-ratio 2", "--hicache-size 0"];
          if (value.backend) {
            adds.push("--hicache-mem-layout page_first_direct", "--hicache-io-backend direct");
          }
          const writePolicy = value.writePolicy && value.writePolicy !== "auto" ? value.writePolicy : "write_through";
          adds.push(`--hicache-write-policy ${writePolicy}`);
          if (value.backend) {
            adds.push(`--hicache-storage-backend ${value.backend}`, "--hicache-storage-prefetch-policy wait_complete");
          }
          flags = h.insertBeforeTail(flags, adds);
        }
        return {
          flags,
          env
        };
      },
      render: ({axisId, value, setValue, fc, base, s, renderChip, renderSelect}) => {
        if (fc.excludesHw && fc.excludesHw.includes(base.hw)) return null;
        const setSlot = (k, v) => setValue({
          ...value,
          [k]: v
        });
        const hasBackends = (fc.backends || []).length > 0;
        const hasPolicies = (fc.writePolicies || []).length > 0;
        return <div key={axisId} style={s.card}>
            <div style={s.compactRow}>
              <span style={s.axisTitle}>HiCache</span>
              <span style={s.field}>
                {renderChip("Enable", value.enable, true, () => setSlot("enable", !value.enable))}
              </span>
              {hasBackends && <span style={s.field}>
                  <span style={s.fieldLabel}>Storage</span>
                  {renderSelect(value.backend, fc.backends, v => setSlot("backend", v), base)}
                </span>}
              {hasPolicies && <span style={s.field}>
                  <span style={s.fieldLabel}>Write</span>
                  {renderSelect(value.writePolicy, fc.writePolicies, v => setSlot("writePolicy", v), base)}
                </span>}
            </div>
          </div>;
      }
    },
    flagSelects: {
      initState: fc => {
        const out = {};
        for (const spec of fc || []) out[spec.id] = null;
        return out;
      },
      deriveFromBase: (cell, fc) => {
        const flags = cell && cell.flags || [];
        const out = {};
        for (const spec of fc || []) {
          const prefixes = spec.stripPrefixes || [];
          const fam = flags.filter(f => prefixes.includes(f.split(/[\s=]/)[0]));
          let hit = null;
          for (const opt of spec.options || []) {
            const of = opt.flags || [];
            if (of.length === fam.length && of.every(x => fam.includes(x))) {
              hit = opt.id;
              break;
            }
          }
          out[spec.id] = hit;
        }
        return out;
      },
      revertHidden: (value, fc, base, h) => {
        let changed = false;
        const next = {
          ...value
        };
        for (const spec of fc || []) {
          const cur = next[spec.id];
          if (cur !== null && cur !== undefined && h.isHidden(spec.options, cur, base)) {
            next[spec.id] = null;
            changed = true;
          }
        }
        return changed ? next : value;
      },
      apply: ({flags, env, value, fc, sel, h, derived}) => {
        const evalBase = {
          ...sel || ({}),
          dpAttnOn: h.hasFlag(flags, "--enable-dp-attention"),
          pdMode: h.findFlagArg(flags, "--disaggregation-mode") || "off"
        };
        for (const spec of fc || []) {
          const v = value ? value[spec.id] : null;
          if (v === null || v === undefined) continue;
          const d = derived ? derived[spec.id] : null;
          if (v === d) continue;
          const opt = (spec.options || []).find(o => o.id === v);
          if (!opt) continue;
          if (h.evaluateChip(opt, evalBase).disabled) continue;
          flags = h.stripFlagsByFirstToken(flags, spec.stripPrefixes || []);
          if (opt.flags && opt.flags.length) {
            flags = h.insertBeforeTail(flags, opt.flags);
          }
        }
        return {
          flags,
          env
        };
      },
      render: ({axisId, value, setValue, fc, base, s, h, renderChip, derived}) => {
        const cards = [];
        for (const spec of fc || []) {
          const opts = (spec.options || []).map(o => h.evaluateChip(o, base)).filter(c => !c.hidden);
          if (!opts.length) continue;
          const explicit = value ? value[spec.id] : null;
          const display = explicit !== null && explicit !== undefined ? explicit : derived ? derived[spec.id] : null;
          cards.push(<div key={`${axisId}-${spec.id}`} style={s.card}>
              <div style={s.compactRow}>
                <span style={s.axisTitle}>{spec.title}</span>
                {opts.map(c => <span key={c.value} style={s.field}>
                    {renderChip(c.label, display, c.value, () => setValue({
            ...value,
            [spec.id]: c.value
          }), {
            disabled: c.disabled,
            disabledReason: c.disableReason
          })}
                  </span>)}
              </div>
            </div>);
        }
        return cards.length ? cards : null;
      }
    }
  };
  const applyAllDeltas = (baseFlags, baseEnv, allDeltas, sel, derivedMap) => {
    let flags = [...baseFlags];
    let env = [...baseEnv || []];
    let pdMode = null;
    for (const [axisId, handler] of Object.entries(AXIS_HANDLERS)) {
      const fc = pgFeatures[axisId];
      if (!fc) continue;
      const value = allDeltas[axisId];
      if (value === undefined) continue;
      const derived = derivedMap ? derivedMap[axisId] : null;
      const out = handler.apply({
        flags,
        env,
        value,
        fc,
        sel,
        h: helpers,
        derived
      });
      flags = out.flags;
      env = out.env;
      if (handler.getRenderHints) {
        const hints = handler.getRenderHints(value, fc) || ({});
        if (hints.pdMode) pdMode = hints.pdMode;
      }
    }
    return {
      flags,
      env,
      pdMode
    };
  };
  const renderCommandLines = (cell, flags, cellEnv, sel, envValues, pdMode = null, mode = "python") => {
    const modelName = resolveModelName(sel);
    const nnodes = parseNnodes(sel.nodes);
    const multinode = nnodes > 1;
    let f = [...flags];
    if (multinode && !f.some(x => x.startsWith("--nnodes"))) {
      const PARALLELISM_ANCHORS = ["--enable-dp-attention", "--dp", "--tp"];
      let at = -1;
      for (const anchor of PARALLELISM_ANCHORS) {
        at = f.findIndex(x => x.split(/[\s=]/)[0] === anchor);
        if (at !== -1) break;
      }
      if (at === -1) at = f.findIndex(x => x.startsWith("--model-path"));
      const distPort = pdMode && PD_PORTS[pdMode] ? PD_PORTS[pdMode].dist : 20000;
      f.splice(at + 1, 0, `--nnodes ${nnodes}`, `--node-rank {{NODE_RANK}}`, `--dist-init-addr {{NODE0_IP}}:${distPort}`);
    }
    let cmd;
    if (mode === "docker") {
      const image = config.dockerImages && config.dockerImages[sel.hw] || "lmsysorg/sglang:dev";
      const portFlag = f.find(x => x.split(/[\s=]/)[0] === "--port");
      const servePort = portFlag ? portFlag.slice(("--port").length).trim() : "{{PORT}}";
      const dockerLines = ["docker run --gpus all", "  --shm-size 32g", multinode || pdMode ? "  --network host" : `  -p ${servePort}:${servePort}`, "  -v ~/.cache/huggingface:/root/.cache/huggingface", `  --env "HF_TOKEN={{HF_TOKEN}}"`, ...cellEnv.map(e => `  --env ${e}`), "  --ipc=host", `  ${image}`, "  sglang serve", ...f.map(x => "    " + x)];
      cmd = dockerLines.join(" \\\n");
    } else {
      const flagBlock = f.map(x => "  " + x).join(" \\\n");
      const envBlock = cellEnv.length ? cellEnv.join(" \\\n") + " \\\n" : "";
      cmd = `${envBlock}sglang serve \\\n${flagBlock}`;
    }
    if (multinode && config.multiNodeHints && config.multiNodeHints[sel.hw]) {
      const hint = config.multiNodeHints[sel.hw].map(line => line.length ? "# " + line : "#").join("\n");
      cmd = `${hint}\n${cmd}`;
    }
    cmd = interpolate(cmd, envValues, modelName);
    if (multinode) {
      const header = `# Multi-node (${nnodes} nodes). Run the same command on every node with:\n` + `#   <node-rank> = 0 on the head node, 1..${nnodes - 1} on the others\n` + `#   <node0-ip>  = IP of the head node (reachable from all others)`;
      cmd = `${header}\n${cmd}`;
    }
    if (pdMode === "prefill" || pdMode === "decode") {
      const sibling = pdMode === "prefill" ? "decode" : "prefill";
      const routerCfg = config.playgroundFeatures && config.playgroundFeatures.pdDisagg && config.playgroundFeatures.pdDisagg.router;
      const routerPort = routerCfg && routerCfg.port || 8000;
      const routerLine = routerCfg ? `# then front BOTH with the Router (SGLang Model Gateway) shown below.\n` + `# Client traffic (cURL) targets the router (:${routerPort}), not this role server.` : `# then front BOTH with a router; client traffic targets the router, not this role server.`;
      const banner = `# === PD Disaggregation: ${pdMode.toUpperCase()} role ===\n` + `# Runs the ${pdMode} server. Also run the ${sibling} role on its peer host,\n` + routerLine;
      cmd = `${banner}\n${cmd}`;
    }
    return cmd;
  };
  const computeDiff = (baseStr, pgStr) => {
    const a = baseStr.split("\n");
    const b = pgStr.split("\n");
    const m = a.length, n = b.length;
    const dp = Array(m + 1).fill(null).map(() => new Array(n + 1).fill(0));
    for (let i = 1; i <= m; i++) {
      for (let j = 1; j <= n; j++) {
        if (a[i - 1] === b[j - 1]) dp[i][j] = dp[i - 1][j - 1] + 1; else dp[i][j] = Math.max(dp[i - 1][j], dp[i][j - 1]);
      }
    }
    const out = [];
    let i = m, j = n;
    while (i > 0 || j > 0) {
      if (i > 0 && j > 0 && a[i - 1] === b[j - 1]) {
        out.unshift({
          line: a[i - 1],
          kind: "unchanged"
        });
        i--;
        j--;
      } else if (j > 0 && (i === 0 || dp[i][j - 1] >= dp[i - 1][j])) {
        out.unshift({
          line: b[j - 1],
          kind: "added"
        });
        j--;
      } else {
        out.unshift({
          line: a[i - 1],
          kind: "removed"
        });
        i--;
      }
    }
    return out;
  };
  const serializeCell = (sel, env, flags) => {
    const matchEntries = [`hw: ${JSON.stringify(sel.hw)}`, `variant: ${JSON.stringify(sel.variant)}`, `quant: ${JSON.stringify(sel.quant)}`, `strategy: ${JSON.stringify(sel.strategy)}`, `nodes: ${JSON.stringify(sel.nodes)}`].join(", ");
    const fmtList = items => {
      if (!items || items.length === 0) return "[]";
      const lines = items.map(s => `        ${JSON.stringify(s)},`).join("\n");
      return `[\n${lines}\n      ]`;
    };
    return ["    {", `      match: { ${matchEntries} },`, "      verified: true,", `      env: ${fmtList(env)},`, `      flags: ${fmtList(flags)},`, "    },"].join("\n");
  };
  const buildSubmitUrl = (sel, fields) => {
    const gh = config.github || ({});
    const owner = gh.owner || "sgl-project";
    const repo = gh.repo || "sglang";
    const tmpl = gh.issueTemplate || "3-playground-verified-cell.yml";
    const cookbookModel = gh.cookbookModel || "deepseek-ai/deepseek-v4";
    const combo = `${sel.hw} / ${sel.variant} / ${sel.quant} / ${sel.strategy} / ${sel.nodes}`;
    const params = new URLSearchParams({
      template: tmpl,
      title: `[Playground] Verified cell: ${combo}`,
      model: cookbookModel,
      combination: combo,
      "cell-snippet": fields.cellSnippet || "",
      "existing-cell": fields.existingCell || "",
      "sglang-version": fields.sglangVersion || "",
      "bench-result": fields.benchResult || "",
      notes: fields.notes || ""
    });
    return `https://github.com/${owner}/${repo}/issues/new?${params.toString()}`;
  };
  const makeStyles = isDark => ({
    container: {
      maxWidth: "900px",
      margin: "0 auto",
      display: "flex",
      flexDirection: "column",
      gap: "6px"
    },
    card: {
      padding: "6px 10px",
      border: `1px solid ${isDark ? "#374151" : "#e5e7eb"}`,
      borderLeft: `3px solid ${isDark ? "#FDBA74" : "#FB923C"}`,
      borderRadius: "4px",
      background: isDark ? "#1f2937" : "#fff"
    },
    cardStack: {
      display: "flex",
      flexDirection: "column",
      gap: "6px"
    },
    baseStrip: {
      padding: "8px 12px",
      borderRadius: "4px",
      background: isDark ? "#064e3b" : "#d1fae5",
      color: isDark ? "#a7f3d0" : "#065f46",
      fontSize: "12px",
      display: "flex",
      alignItems: "center",
      gap: "10px"
    },
    title: {
      fontSize: "13px",
      fontWeight: "600",
      color: isDark ? "#e5e7eb" : "inherit",
      marginBottom: "8px"
    },
    compactRow: {
      display: "flex",
      flexWrap: "wrap",
      alignItems: "center",
      gap: "10px",
      rowGap: "4px"
    },
    axisTitle: {
      fontSize: "12px",
      fontWeight: 700,
      color: isDark ? "#FDBA74" : "#C2410C",
      letterSpacing: "0.02em",
      minWidth: "100px",
      flexShrink: 0
    },
    field: {
      display: "inline-flex",
      alignItems: "center",
      gap: "4px"
    },
    fieldLabel: {
      fontSize: "11px",
      fontWeight: 500,
      color: isDark ? "#9ca3af" : "#6b7280"
    },
    select: {
      padding: "2px 6px",
      border: `1px solid ${isDark ? "#4b5563" : "#d1d5db"}`,
      borderRadius: "3px",
      fontSize: "12px",
      background: isDark ? "#111827" : "#fff",
      color: isDark ? "#e5e7eb" : "#111827",
      cursor: "pointer",
      lineHeight: "1.4"
    },
    rowFlex: {
      display: "flex",
      flexWrap: "wrap",
      gap: "6px",
      alignItems: "center",
      flex: 1
    },
    subRow: {
      display: "flex",
      alignItems: "center",
      gap: "10px"
    },
    subLabel: {
      fontSize: "11px",
      fontWeight: 600,
      color: isDark ? "#9ca3af" : "#6b7280",
      minWidth: "96px",
      flexShrink: 0,
      letterSpacing: "0.02em"
    },
    chipRow: {
      display: "flex",
      flexWrap: "wrap",
      gap: "6px",
      flex: 1
    },
    chip: {
      padding: "3px 9px",
      border: `1px solid ${isDark ? "#9ca3af" : "#d1d5db"}`,
      borderRadius: "3px",
      cursor: "pointer",
      fontSize: "12px",
      userSelect: "none",
      background: isDark ? "#374151" : "#fff",
      color: isDark ? "#e5e7eb" : "inherit",
      textAlign: "center"
    },
    chipChecked: {
      background: "#D45D44",
      color: "white",
      borderColor: "#D45D44"
    },
    chipDisabled: {
      cursor: "not-allowed",
      opacity: 0.4
    },
    commandWrap: {
      position: "relative",
      background: isDark ? "#111827" : "#f5f5f5",
      borderRadius: "6px",
      border: `1px solid ${isDark ? "#374151" : "#e5e7eb"}`,
      overflow: "hidden"
    },
    commandHeader: {
      display: "flex",
      flexWrap: "wrap",
      justifyContent: "space-between",
      alignItems: "center",
      gap: "6px 10px",
      padding: "6px 10px",
      borderBottom: `1px solid ${isDark ? "#374151" : "#e5e7eb"}`,
      background: isDark ? "#1f2937" : "#fafafa"
    },
    commandPre: {
      padding: "12px 16px",
      fontFamily: "'Menlo', 'Monaco', 'Courier New', monospace",
      fontSize: "12px",
      lineHeight: "1.5",
      color: isDark ? "#e5e7eb" : "#374151",
      whiteSpace: "pre-wrap",
      overflowX: "auto",
      margin: 0
    },
    mtpWarn: {
      margin: "8px 0 0",
      padding: "8px 12px",
      borderRadius: "8px",
      fontSize: "12px",
      lineHeight: "1.45",
      background: isDark ? "#78350f" : "#fef3c7",
      color: isDark ? "#fde68a" : "#92400e",
      border: `1px solid ${isDark ? "#92400e" : "#fcd34d"}`
    },
    diffLineUnchanged: {
      display: "block"
    },
    diffLineAdded: {
      display: "block",
      background: isDark ? "rgba(16,185,129,0.15)" : "rgba(16,185,129,0.18)",
      color: isDark ? "#a7f3d0" : "#065f46",
      borderLeft: `3px solid #10b981`,
      paddingLeft: "8px",
      marginLeft: "-8px"
    },
    diffLineRemoved: {
      display: "block",
      background: isDark ? "rgba(239,68,68,0.10)" : "rgba(239,68,68,0.10)",
      color: isDark ? "#fca5a5" : "#991b1b",
      textDecoration: "line-through",
      opacity: 0.7,
      borderLeft: `3px solid #ef4444`,
      paddingLeft: "8px",
      marginLeft: "-8px"
    },
    badge: verified => ({
      display: "inline-flex",
      alignItems: "center",
      gap: "6px",
      padding: "2px 8px",
      borderRadius: "10px",
      background: verified ? isDark ? "#064e3b" : "#d1fae5" : isDark ? "#78350f" : "#fef3c7",
      color: verified ? isDark ? "#a7f3d0" : "#065f46" : isDark ? "#fde68a" : "#92400e",
      fontSize: "11px",
      fontWeight: 600
    }),
    badgeDot: verified => ({
      width: "8px",
      height: "8px",
      borderRadius: "50%",
      background: verified ? "#10b981" : "#f59e0b"
    }),
    iconButton: {
      padding: "4px 10px",
      border: `1px solid ${isDark ? "#4b5563" : "#d1d5db"}`,
      borderRadius: "4px",
      background: isDark ? "#1f2937" : "#fff",
      color: isDark ? "#e5e7eb" : "#374151",
      fontSize: "11px",
      fontWeight: 500,
      cursor: "pointer",
      display: "inline-flex",
      alignItems: "center",
      gap: "4px"
    },
    iconRow: {
      display: "inline-flex",
      flexWrap: "wrap",
      gap: "6px"
    },
    runModeWrap: {
      display: "inline-flex",
      border: `1px solid ${isDark ? "#4b5563" : "#d1d5db"}`,
      borderRadius: "10px",
      overflow: "hidden",
      fontSize: "11px",
      fontWeight: 600,
      userSelect: "none"
    },
    runModeChip: active => ({
      padding: "2px 10px",
      cursor: "pointer",
      background: active ? isDark ? "#1f2937" : "#fff" : "transparent",
      color: active ? isDark ? "#e5e7eb" : "#111827" : isDark ? "#9ca3af" : "#6b7280",
      borderRight: `1px solid ${isDark ? "#4b5563" : "#d1d5db"}`
    }),
    runModeChipLast: active => ({
      padding: "2px 10px",
      cursor: "pointer",
      background: active ? isDark ? "#1f2937" : "#fff" : "transparent",
      color: active ? isDark ? "#e5e7eb" : "#111827" : isDark ? "#9ca3af" : "#6b7280"
    }),
    headerLeft: {
      display: "inline-flex",
      flexWrap: "wrap",
      alignItems: "center",
      gap: "8px"
    },
    dialog: {
      background: isDark ? "#1f2937" : "#fff",
      color: isDark ? "#e5e7eb" : "#111827",
      borderRadius: "8px",
      padding: "20px",
      maxWidth: "720px",
      width: "92%",
      maxHeight: "calc(100vh - 80px)",
      overflowY: "auto",
      border: `1px solid ${isDark ? "#374151" : "#e5e7eb"}`,
      boxShadow: "0 10px 25px rgba(0,0,0,0.25)",
      margin: "auto"
    },
    modalHeader: {
      display: "flex",
      justifyContent: "space-between",
      alignItems: "center",
      marginBottom: "12px"
    },
    modalTitle: {
      fontSize: "15px",
      fontWeight: 600
    },
    modalCloseBtn: {
      background: "transparent",
      border: "none",
      color: "inherit",
      fontSize: "20px",
      cursor: "pointer",
      padding: "0 6px",
      lineHeight: 1
    },
    formField: {
      display: "flex",
      flexDirection: "column",
      gap: "4px",
      marginBottom: "10px"
    },
    formLabel: {
      fontSize: "12px",
      fontWeight: 500,
      color: isDark ? "#9ca3af" : "#4b5563"
    },
    formInput: {
      padding: "6px 10px",
      fontSize: "13px",
      border: `1px solid ${isDark ? "#4b5563" : "#d1d5db"}`,
      borderRadius: "4px",
      background: isDark ? "#111827" : "#fff",
      color: isDark ? "#e5e7eb" : "#111827",
      fontFamily: "'Menlo', 'Monaco', 'Courier New', monospace"
    },
    sectionHeading: {
      fontSize: "12px",
      fontWeight: 600,
      textTransform: "uppercase",
      letterSpacing: "0.04em",
      color: isDark ? "#9ca3af" : "#6b7280",
      margin: "12px 0 6px 0"
    },
    primaryBtn: {
      padding: "6px 14px",
      background: isDark ? "#FDBA74" : "#FB923C",
      color: isDark ? "#7C2D12" : "white",
      border: "none",
      borderRadius: "4px",
      cursor: "pointer",
      fontSize: "13px",
      fontWeight: 500
    },
    resetBtn: {
      marginLeft: "auto",
      padding: "2px 8px",
      fontSize: "11px",
      border: `1px solid ${isDark ? "#4b5563" : "#d1d5db"}`,
      borderRadius: "3px",
      background: "transparent",
      color: isDark ? "#9ca3af" : "#6b7280",
      cursor: "pointer"
    },
    switchBaseBtn: {
      padding: "2px 8px",
      fontSize: "11px",
      fontWeight: 600,
      border: `1px solid ${isDark ? "#FDBA74" : "#FB923C"}`,
      borderRadius: "3px",
      background: "transparent",
      color: isDark ? "#FDBA74" : "#C2410C",
      cursor: "pointer"
    },
    matchedHint: {
      fontSize: "11px",
      color: isDark ? "#9ca3af" : "#6b7280",
      marginLeft: "8px",
      display: "inline-flex",
      alignItems: "center",
      gap: "4px"
    },
    matchedSwitchBtn: {
      marginLeft: "4px",
      background: "transparent",
      border: "none",
      padding: 0,
      color: isDark ? "#FDBA74" : "#C2410C",
      cursor: "pointer",
      fontSize: "11px",
      fontWeight: 600,
      textDecoration: "underline",
      textUnderlineOffset: "2px"
    }
  });
  const [isDark, setIsDark] = useState(false);
  useEffect(() => {
    const check = () => {
      const html = document.documentElement;
      setIsDark(html.classList.contains("dark") || html.getAttribute("data-theme") === "dark" || html.style.colorScheme === "dark");
    };
    check();
    const observer = new MutationObserver(check);
    observer.observe(document.documentElement, {
      attributes: true,
      attributeFilter: ["class", "data-theme", "style"]
    });
    return () => observer.disconnect();
  }, []);
  const [env, setEnv] = useState(() => placeholderDefaults(config.placeholders));
  useEffect(() => {
    try {
      const raw = window.localStorage.getItem(STORAGE_KEY);
      if (raw) {
        const parsed = JSON.parse(raw);
        setEnv({
          ...placeholderDefaults(config.placeholders),
          ...parsed
        });
      }
    } catch {}
  }, []);
  const saveEnv = next => {
    setEnv(next);
    try {
      window.localStorage.setItem(STORAGE_KEY, JSON.stringify(next));
    } catch {}
  };
  const initialBaseFromHash = () => {
    const fallback = config.cells[0].match;
    if (typeof window === "undefined") return {
      ...fallback
    };
    const raw = window.location.hash.replace(/^#/, "");
    if (!raw) return {
      ...fallback
    };
    const params = new URLSearchParams(raw);
    const out = {
      ...fallback
    };
    params.forEach((value, key) => {
      if ((key in out)) out[key] = value;
    });
    return out;
  };
  const [base, setBase] = useState(() => initialBaseFromHash());
  useEffect(() => {
    const onHash = () => setBase(initialBaseFromHash());
    const onSelEvent = e => {
      const fallback = config.cells[0].match;
      const incoming = e && e.detail || ({});
      const next = {
        ...fallback
      };
      for (const k of Object.keys(next)) {
        if (incoming[k] !== undefined) next[k] = incoming[k];
      }
      setBase(next);
    };
    window.addEventListener("hashchange", onHash);
    window.addEventListener("sglang-deploy-sel", onSelEvent);
    return () => {
      window.removeEventListener("hashchange", onHash);
      window.removeEventListener("sglang-deploy-sel", onSelEvent);
    };
  }, []);
  const initialDeltas = () => {
    const out = {};
    for (const [axisId, handler] of Object.entries(AXIS_HANDLERS)) {
      const fc = pgFeatures[axisId];
      if (fc) out[axisId] = handler.initState(fc);
    }
    return out;
  };
  const [deltas, setDeltas] = useState(initialDeltas);
  useEffect(() => {
    setDeltas(d => {
      let next = d;
      let mutated = false;
      for (const [axisId, handler] of Object.entries(AXIS_HANDLERS)) {
        const fc = pgFeatures[axisId];
        if (!fc || d[axisId] === undefined) continue;
        const nv = handler.revertHidden(d[axisId], fc, base, helpers);
        if (nv !== d[axisId]) {
          if (!mutated) {
            next = {
              ...d
            };
            mutated = true;
          }
          next[axisId] = nv;
        }
      }
      return mutated ? next : d;
    });
  }, [base.hw, base.variant, base.quant, base.strategy, base.nodes]);
  const [modal, setModal] = useState(null);
  const openDialog = el => {
    if (el && !el.open) {
      try {
        el.showModal();
      } catch {}
    }
  };
  const onDialogClick = e => {
    if (e.target !== e.currentTarget) return;
    const r = e.currentTarget.getBoundingClientRect();
    const {clientX: x, clientY: y} = e;
    if (x < r.left || x > r.right || y < r.top || y > r.bottom) setModal(null);
  };
  useEffect(() => {
    const ID = "__playground_dialog_backdrop";
    if (document.getElementById(ID)) return undefined;
    const style = document.createElement("style");
    style.id = ID;
    style.textContent = `dialog::backdrop { background: rgba(0, 0, 0, 0.5); }`;
    document.head.appendChild(style);
    return () => {
      const el = document.getElementById(ID);
      if (el) el.remove();
    };
  }, []);
  const [copied, setCopied] = useState(false);
  const [curlCopied, setCurlCopied] = useState(false);
  const [routerCopied, setRouterCopied] = useState(false);
  const [envDraft, setEnvDraft] = useState(env);
  useEffect(() => {
    if (modal === "env") setEnvDraft(env);
  }, [modal, env]);
  const [runMode, setRunMode] = useState("python");
  const [submitDraft, setSubmitDraft] = useState({
    sglangVersion: "",
    benchResult: "",
    notes: ""
  });
  const [submitAttest, setSubmitAttest] = useState({
    ranCommand: false,
    reachedReady: false,
    outputCorrect: false
  });
  useEffect(() => {
    if (modal === "submit") {
      setSubmitDraft({
        sglangVersion: "",
        benchResult: "",
        notes: ""
      });
      setSubmitAttest({
        ranCommand: false,
        reachedReady: false,
        outputCorrect: false
      });
    }
  }, [modal]);
  const s = makeStyles(isDark);
  const baseCell = findCell(config.cells, base);
  const modelName = resolveModelName(base);
  const derivedMap = {};
  if (baseCell) {
    for (const [axisId, handler] of Object.entries(AXIS_HANDLERS)) {
      const fc = pgFeatures[axisId];
      if (!fc || !handler.deriveFromBase) continue;
      derivedMap[axisId] = handler.deriveFromBase(baseCell, fc, helpers);
    }
  }
  const attnDelta = deltas.attention || ({});
  const attnDerived = derivedMap.attention || ({});
  const effDpAttn = attnDelta.dpAttn !== null && attnDelta.dpAttn !== undefined ? attnDelta.dpAttn : attnDerived.dpAttn !== undefined ? attnDerived.dpAttn : null;
  const dpAttnOn = effDpAttn === true || typeof effDpAttn === "number" && effDpAttn > 0;
  const pdMode = deltas.pdDisagg && deltas.pdDisagg.mode || "off";
  const constraintBase = {
    ...base,
    dpAttnOn,
    pdMode
  };
  let baseCommand = "";
  let playgroundCommand = "";
  let diffLines = [];
  let pgFlagsLatest = [];
  let pgEnvLatest = [];
  if (baseCell) {
    baseCommand = renderCommandLines(baseCell, baseCell.flags, baseCell.env, base, env, null, runMode);
    const {flags: pgFlags, env: pgEnv, pdMode} = applyAllDeltas(baseCell.flags, baseCell.env, deltas, base, derivedMap);
    pgFlagsLatest = pgFlags;
    pgEnvLatest = pgEnv;
    playgroundCommand = renderCommandLines(baseCell, pgFlags, pgEnv, base, env, pdMode, runMode);
    diffLines = computeDiff(baseCommand, playgroundCommand);
  }
  const matchedCell = baseCell ? findMatchingCell(config.cells, base, pgEnvLatest, pgFlagsLatest) : null;
  const playgroundVerified = !!(matchedCell && matchedCell.verified);
  const matchedSiblingCell = matchedCell && matchedCell !== baseCell ? matchedCell : null;
  const pgMtpHint = pgFlagsLatest.some(f => f.split(/[\s=]/)[0] === "--speculative-algorithm") && !pgFlagsLatest.some(f => f.split(/[\s=]/)[0] === "--max-running-requests");
  const proposedCellSnippet = baseCell ? serializeCell(base, pgEnvLatest, pgFlagsLatest) : "";
  const existingCellSnippet = baseCell ? serializeCell(base, baseCell.env || [], baseCell.flags) : "";
  const submitUrl = baseCell ? buildSubmitUrl(base, {
    cellSnippet: proposedCellSnippet,
    existingCell: existingCellSnippet,
    sglangVersion: submitDraft.sglangVersion,
    benchResult: submitDraft.benchResult,
    notes: submitDraft.notes
  }) : "";
  const submitReady = submitAttest.ranCommand && submitAttest.reachedReady && submitAttest.outputCorrect && submitDraft.sglangVersion.trim().length > 0;
  const pdRouter = pdMode !== "off" && config.playgroundFeatures && config.playgroundFeatures.pdDisagg && config.playgroundFeatures.pdDisagg.router || null;
  const curlEnv = pdRouter && pdRouter.port != null ? {
    ...env,
    CURL_PORT: String(pdRouter.port)
  } : env;
  const curlText = interpolate(config.curl || "", curlEnv, modelName);
  const routerText = pdRouter && pdRouter.command ? interpolate(pdRouter.command, {
    ...env,
    PREFILL_PORT: PD_PORTS.prefill.serve,
    DECODE_PORT: PD_PORTS.decode.serve,
    ROUTER_PORT: pdRouter.port
  }, modelName) : "";
  const resetAll = () => setDeltas(initialDeltas());
  const placeholderGroups = (() => {
    const out = {
      command: [],
      curl: []
    };
    for (const [key, meta] of Object.entries(config.placeholders || ({}))) {
      (out[meta.target] || (out[meta.target] = [])).push({
        key,
        ...meta
      });
    }
    return out;
  })();
  const handleCopy = () => {
    navigator.clipboard.writeText(playgroundCommand);
    setCopied(true);
    setTimeout(() => setCopied(false), 1200);
  };
  const copyCurl = () => {
    navigator.clipboard.writeText(curlText);
    setCurlCopied(true);
    setTimeout(() => setCurlCopied(false), 1200);
  };
  const baseSummary = baseCell ? `${base.hw.toUpperCase()} · ${base.variant} · ${base.quant.toUpperCase()} · ${base.strategy} · ${base.nodes}` : "(no verified cell at the current Deploy selection — showing playground only)";
  const renderChip = (label, current, value, onPick, opts = {}) => {
    const checked = current === value;
    const disabled = !!opts.disabled;
    return <span key={`${label}-${value === null ? "auto" : value}`} style={{
      ...s.chip,
      ...checked ? s.chipChecked : {},
      ...disabled ? s.chipDisabled : {}
    }} title={disabled ? opts.disabledReason || "Not available" : ""} onClick={() => {
      if (!disabled) onPick(value);
    }}>
        {label}
      </span>;
  };
  const renderSelect = (current, entries, onPick, base, labelFor, opts = {}) => {
    const hideSet = new Set(opts.hideValues || []);
    const items = [];
    for (const entry of entries || []) {
      const c = helpers.evaluateChip(entry, base);
      if (c.hidden) continue;
      if (hideSet.has(c.value)) continue;
      const lbl = labelFor ? labelFor(c) : c.label !== undefined ? c.label : c.value === null ? "Auto" : String(c.value);
      items.push({
        ...c,
        label: lbl
      });
    }
    let idx = items.findIndex(c => c.value === current);
    if (idx === -1) idx = 0;
    return <select style={s.select} value={idx} onChange={e => {
      const next = items[parseInt(e.target.value, 10)];
      if (next && !next.disabled) onPick(next.value);
    }}>
        {items.map((c, i) => <option key={i} value={i} disabled={c.disabled}>
            {c.label}{c.disabled ? " (n/a)" : ""}
          </option>)}
      </select>;
  };
  return <div style={s.container} className="not-prose">
      {}
      <div style={s.baseStrip}>
        <span style={{
    fontWeight: 600
  }}>Inherited base from Deployment:</span>
        <code style={{
    fontFamily: "Menlo, monospace"
  }}>{baseSummary}</code>
        {}
        <button type="button" style={s.switchBaseBtn} onClick={() => {
    const el = document.getElementById("deployment") || document.getElementById("deploy");
    if (el) el.scrollIntoView({
      behavior: "smooth",
      block: "start"
    });
  }}>
          ↑ Switch base
        </button>
        <button style={s.resetBtn} onClick={resetAll}>Reset all overrides</button>
      </div>

      {}
      {Object.entries(AXIS_HANDLERS).map(([axisId, handler]) => {
    const fc = pgFeatures[axisId];
    if (!fc) return null;
    const setValue = next => setDeltas(d => ({
      ...d,
      [axisId]: next
    }));
    return handler.render({
      axisId,
      value: deltas[axisId],
      setValue,
      fc,
      base: constraintBase,
      s,
      h: helpers,
      renderChip,
      renderSelect,
      derived: derivedMap[axisId] || null
    });
  })}

      {}
      <div style={s.card}>
        <div style={s.title}>Playground Command (compare with base)</div>
        <div style={s.commandWrap}>
          <div style={s.commandHeader}>
            <div style={s.headerLeft}>
              <div style={s.badge(playgroundVerified)}>
                <span style={s.badgeDot(playgroundVerified)} />
                {playgroundVerified ? "Verified" : "Not Verified"}
              </div>
              {}
              {matchedSiblingCell && <span style={s.matchedHint}>
                  matches <code style={{
    fontFamily: "Menlo, monospace"
  }}>
                    {matchedSiblingCell.match.strategy}
                  </code>
                  <button type="button" style={s.matchedSwitchBtn} onClick={() => {
    const m = matchedSiblingCell.match;
    setDeltas(initialDeltas());
    const hash = new URLSearchParams(m).toString();
    window.location.hash = hash;
    window.dispatchEvent(new CustomEvent("sglang-deploy-sel", {
      detail: m
    }));
  }}>
                    switch base →
                  </button>
                </span>}
              <div style={s.runModeWrap} role="tablist" aria-label="Output format">
                <span style={s.runModeChip(runMode === "python")} onClick={() => setRunMode("python")} role="tab" aria-selected={runMode === "python"}>
                  Python
                </span>
                <span style={s.runModeChipLast(runMode === "docker")} onClick={() => setRunMode("docker")} role="tab" aria-selected={runMode === "docker"}>
                  Docker
                </span>
              </div>
            </div>
            <div style={s.iconRow}>
              <button style={s.iconButton} onClick={handleCopy}>
                {copied ? "✓ Copied" : "⧉ Copy"}
              </button>
              <button style={s.iconButton} onClick={() => setModal("curl")}>$ cURL</button>
              <button style={s.iconButton} onClick={() => setModal("env")}>⚙ Env</button>
              {}
              {!playgroundVerified && baseCell && <button style={{
    ...s.iconButton,
    borderColor: isDark ? "#FDBA74" : "#FB923C",
    color: isDark ? "#FDBA74" : "#C2410C",
    fontWeight: 600
  }} onClick={() => setModal("submit")} title="I verified this command on my hardware — open a pre-filled GitHub issue to land it as a cookbook cell.">
                  Submit ↗
                </button>}
            </div>
          </div>
          <pre style={s.commandPre}>
            {baseCell ? diffLines.map((d, i) => <span key={i} style={d.kind === "added" ? s.diffLineAdded : d.kind === "removed" ? s.diffLineRemoved : s.diffLineUnchanged}>
                {d.kind === "added" ? "+ " : d.kind === "removed" ? "- " : "  "}
                {d.line}{"\n"}
              </span>) : "# No verified base cell at the current Deployment selection.\n# Pick a supported hardware/variant in the Deployment panel to populate the playground base."}
          </pre>
          {pgMtpHint && <div style={s.mtpWarn}>
              ⚠️ Speculative decoding (MTP) is on — SGLang resets <code>--max-running-requests</code> to <strong>48</strong> when it isn't set. Add <code>--max-running-requests &lt;N&gt;</code> sized for your target concurrency.
            </div>}
        </div>
      </div>

      {}
      {pdRouter && routerText && <div style={s.card}>
          <div style={s.title}>Router (SGLang Model Gateway)</div>
          <div style={{
    fontSize: 11,
    opacity: 0.7,
    margin: "0 0 6px"
  }}>
            Run after both roles are up. Substitute <code>{"<prefill-host>"}</code> /{" "}
            <code>{"<decode-host>"}</code> with reachable hosts (both <code>127.0.0.1</code>{" "}
            on a same-host deployment). Client traffic (cURL) targets this router.
          </div>
          <div style={s.commandWrap}>
            <div style={s.commandHeader}>
              <div style={{
    fontSize: 11,
    opacity: 0.7
  }}>port {pdRouter.port}</div>
              <button style={s.iconButton} onClick={() => {
    navigator.clipboard.writeText(routerText);
    setRouterCopied(true);
    setTimeout(() => setRouterCopied(false), 1200);
  }}>
                {routerCopied ? "✓ Copied" : "⧉ Copy"}
              </button>
            </div>
            <pre style={s.commandPre}>{routerText}</pre>
          </div>
        </div>}

      {}
      {modal === "curl" && <dialog ref={openDialog} style={s.dialog} onClose={() => setModal(null)} onClick={onDialogClick}>
          <div style={s.modalHeader}>
            <div style={s.modalTitle}>cURL example</div>
            <button style={s.modalCloseBtn} onClick={() => setModal(null)} aria-label="Close">×</button>
          </div>
          <div style={s.commandWrap}>
            <div style={s.commandHeader}>
              <div style={{
    fontSize: 11,
    opacity: 0.7
  }}>
                Model: <code>{modelName || "(unresolved)"}</code>
              </div>
              <button style={s.iconButton} onClick={copyCurl}>
                {curlCopied ? "✓ Copied" : "⧉ Copy"}
              </button>
            </div>
            <pre style={s.commandPre}>{curlText}</pre>
          </div>
          {pdRouter && <p style={{
    fontSize: 11,
    opacity: 0.85,
    marginTop: 8
  }}>
              <strong>PD-Disaggregation active</strong> — this targets the router on
              {" "}<code>:{pdRouter.port}</code>; client traffic must not hit the role
              servers directly.
            </p>}
          <p style={{
    fontSize: 11,
    opacity: 0.7,
    marginTop: 8
  }}>
            Edit <code>CURL_HOST</code> / <code>CURL_PORT</code> in the Env panel.
          </p>
        </dialog>}

      {}
      {modal === "env" && <dialog ref={openDialog} style={s.dialog} onClose={() => setModal(null)} onClick={onDialogClick}>
          <div style={s.modalHeader}>
            <div style={s.modalTitle}>Env / placeholder values</div>
            <button style={s.modalCloseBtn} onClick={() => setModal(null)} aria-label="Close">×</button>
          </div>
            {placeholderGroups.curl.length > 0 && <div>
                <div style={s.sectionHeading}>cURL placeholders</div>
                {placeholderGroups.curl.map(({key, label}) => <div key={key} style={s.formField}>
                    <label style={s.formLabel}>
                      {label} <code style={{
    opacity: 0.6
  }}>{`{{${key}}}`}</code>
                    </label>
                    <input style={s.formInput} value={envDraft[key] ?? ""} onChange={e => setEnvDraft({
    ...envDraft,
    [key]: e.target.value
  })} />
                  </div>)}
              </div>}
            {placeholderGroups.command.length > 0 && <div>
                <div style={s.sectionHeading}>Command placeholders</div>
                {placeholderGroups.command.map(({key, label}) => <div key={key} style={s.formField}>
                    <label style={s.formLabel}>
                      {label} <code style={{
    opacity: 0.6
  }}>{`{{${key}}}`}</code>
                    </label>
                    <input style={s.formInput} value={envDraft[key] ?? ""} onChange={e => setEnvDraft({
    ...envDraft,
    [key]: e.target.value
  })} />
                  </div>)}
              </div>}
            <div style={{
    display: "flex",
    justifyContent: "flex-end",
    gap: 8,
    marginTop: 16
  }}>
              <button style={{
    ...s.iconButton,
    padding: "6px 14px"
  }} onClick={() => setModal(null)}>Cancel</button>
              <button style={s.primaryBtn} onClick={() => {
    saveEnv(envDraft);
    setModal(null);
  }}>Save</button>
            </div>
          <p style={{
    fontSize: 11,
    opacity: 0.7,
    marginTop: 10
  }}>
            Values persist in localStorage and are shared with the Deployment panel.
          </p>
        </dialog>}

      {}
      {modal === "submit" && <dialog ref={openDialog} style={s.dialog} onClose={() => setModal(null)} onClick={onDialogClick}>
            <div style={s.modalHeader}>
              <div style={s.modalTitle}>Submit verified cell</div>
              <button style={s.modalCloseBtn} onClick={() => setModal(null)} aria-label="Close">×</button>
            </div>
            <p style={{
    fontSize: 12,
    opacity: 0.85,
    marginTop: 0,
    marginBottom: 12
  }}>
              You've put together a combination that isn't in the verified
              catalog yet. After you've run the command end-to-end on the
              target hardware, this submits a pre-filled GitHub Issue that a
              maintainer can convert into a PR.
            </p>

            <div style={s.sectionHeading}>Combination</div>
            <code style={{
    fontFamily: "Menlo, monospace",
    fontSize: 12
  }}>
              {base.hw} / {base.variant} / {base.quant} / {base.strategy} / {base.nodes}
            </code>
            {}
            {(() => {
    const adds = diffLines.filter(d => d.kind === "added");
    const rems = diffLines.filter(d => d.kind === "removed");
    if (adds.length === 0 && rems.length === 0) return null;
    return <>
                  <div style={{
      ...s.sectionHeading,
      marginTop: 10
    }}>
                    Overrides vs base ({adds.length} added · {rems.length} removed)
                  </div>
                  <pre style={{
      margin: 0,
      padding: "8px 10px",
      background: isDark ? "#111827" : "#f5f5f5",
      border: `1px solid ${isDark ? "#374151" : "#e5e7eb"}`,
      borderRadius: 4,
      fontFamily: "'Menlo', 'Monaco', 'Courier New', monospace",
      fontSize: 12,
      lineHeight: 1.4,
      maxHeight: 160,
      overflowY: "auto",
      whiteSpace: "pre-wrap"
    }}>
                    {[...rems, ...adds].map((d, i) => <div key={i} style={d.kind === "added" ? s.diffLineAdded : s.diffLineRemoved}>
                        {d.kind === "added" ? "+ " : "- "}
                        {d.line.replace(/^\s*/, "")}
                      </div>)}
                  </pre>
                </>;
  })()}

            <div style={{
    ...s.sectionHeading,
    marginTop: 14
  }}>Attestation (all required)</div>
            <div style={s.formField}>
              <label style={{
    fontSize: 12,
    display: "flex",
    alignItems: "flex-start",
    gap: 6
  }}>
                <input type="checkbox" checked={submitAttest.ranCommand} onChange={e => setSubmitAttest({
    ...submitAttest,
    ranCommand: e.target.checked
  })} />
                I ran this exact command on the listed hardware.
              </label>
              <label style={{
    fontSize: 12,
    display: "flex",
    alignItems: "flex-start",
    gap: 6
  }}>
                <input type="checkbox" checked={submitAttest.reachedReady} onChange={e => setSubmitAttest({
    ...submitAttest,
    reachedReady: e.target.checked
  })} />
                The server reached READY and answered a cURL request successfully.
              </label>
              <label style={{
    fontSize: 12,
    display: "flex",
    alignItems: "flex-start",
    gap: 6
  }}>
                <input type="checkbox" checked={submitAttest.outputCorrect} onChange={e => setSubmitAttest({
    ...submitAttest,
    outputCorrect: e.target.checked
  })} />
                Output looked correct on at least one prompt.
              </label>
            </div>

            <div style={{
    ...s.sectionHeading,
    marginTop: 14
  }}>SGLang version (required)</div>
            <input style={{
    ...s.formInput,
    width: "100%",
    boxSizing: "border-box"
  }} placeholder="sglang==0.5.4  (or git SHA abc1234)" value={submitDraft.sglangVersion} onChange={e => setSubmitDraft({
    ...submitDraft,
    sglangVersion: e.target.value
  })} />

            <div style={{
    ...s.sectionHeading,
    marginTop: 14
  }}>Benchmark result (optional)</div>
            <input style={{
    ...s.formInput,
    width: "100%",
    boxSizing: "border-box"
  }} placeholder="TTFT 95 ms / TPOT 18 ms / 1820 tok/s @ bs=64" value={submitDraft.benchResult} onChange={e => setSubmitDraft({
    ...submitDraft,
    benchResult: e.target.value
  })} />

            <div style={{
    ...s.sectionHeading,
    marginTop: 14
  }}>Notes / caveats (optional)</div>
            <textarea style={{
    ...s.formInput,
    width: "100%",
    boxSizing: "border-box",
    minHeight: 110,
    resize: "vertical",
    fontFamily: "inherit"
  }} placeholder="Cluster config, env-var quirks, NIC mappings, multi-node bootstrap details, …" value={submitDraft.notes} onChange={e => setSubmitDraft({
    ...submitDraft,
    notes: e.target.value
  })} />

            <div style={{
    display: "flex",
    justifyContent: "flex-end",
    gap: 8,
    marginTop: 16,
    alignItems: "center"
  }}>
              {!submitReady && <span style={{
    fontSize: 11,
    opacity: 0.7,
    marginRight: "auto"
  }}>
                  Tick all attestations and fill SGLang version to enable submit.
                </span>}
              <button style={{
    ...s.iconButton,
    padding: "6px 14px"
  }} onClick={() => setModal(null)}>Cancel</button>
              <a href={submitReady ? submitUrl : undefined} target="_blank" rel="noopener noreferrer" onClick={e => {
    if (!submitReady) e.preventDefault(); else setModal(null);
  }} style={{
    ...s.primaryBtn,
    textDecoration: "none",
    display: "inline-flex",
    alignItems: "center",
    opacity: submitReady ? 1 : 0.4,
    cursor: submitReady ? "pointer" : "not-allowed"
  }}>
                Open submission on GitHub →
              </a>
            </div>
          <p style={{
    fontSize: 11,
    opacity: 0.7,
    marginTop: 10
  }}>
            The CTA opens a pre-filled GitHub Issue using the
            <code> 3-playground-verified-cell.yml</code> template. A
            maintainer with the listed hardware will review and convert it
            into a cookbook PR.
          </p>
        </dialog>}
    </div>;
};

export const benchmarks = [{
  match: {
    hw: "b200",
    variant: "flash",
    quant: "fp4",
    strategy: "low-latency",
    nodes: "single"
  },
  sglang_version: "0.5.12.post1",
  speed: [{
    workload: {
      dataset: "random",
      isl: 8192,
      osl: 1024,
      max_concurrency: 1
    },
    ttft_ms: 87,
    tpot_ms: 3.68,
    tokens_per_sec_per_gpu: 65
  }, {
    workload: {
      dataset: "random",
      isl: 8192,
      osl: 1024,
      max_concurrency: 16
    },
    ttft_ms: 290,
    tpot_ms: 6.21,
    tokens_per_sec_per_gpu: 489
  }]
}, {
  match: {
    hw: "b200",
    variant: "flash",
    quant: "fp4",
    strategy: "balanced",
    nodes: "single"
  },
  sglang_version: "0.5.12.post1",
  speed: [{
    workload: {
      dataset: "random",
      isl: 8192,
      osl: 1024,
      max_concurrency: 64
    },
    ttft_ms: 4228,
    tpot_ms: 60.98,
    tokens_per_sec_per_gpu: 225
  }, {
    workload: {
      dataset: "random",
      isl: 8192,
      osl: 1024,
      max_concurrency: 256
    },
    ttft_ms: 4628,
    tpot_ms: 88.25,
    tokens_per_sec_per_gpu: 643
  }]
}, {
  match: {
    hw: "b200",
    variant: "flash",
    quant: "fp4",
    strategy: "high-throughput",
    nodes: "single"
  },
  sglang_version: "0.5.12.post1",
  speed: [{
    workload: {
      dataset: "random",
      isl: 8192,
      osl: 1024,
      max_concurrency: 1024
    },
    ttft_ms: 99949,
    tpot_ms: 67.46,
    tokens_per_sec_per_gpu: 939
  }, {
    workload: {
      dataset: "random",
      isl: 8192,
      osl: 1024,
      max_concurrency: 4096
    },
    ttft_ms: 253310,
    tpot_ms: 66.11,
    tokens_per_sec_per_gpu: 964
  }]
}, {
  match: {
    hw: "b200",
    variant: "pro",
    quant: "fp4",
    strategy: "low-latency",
    nodes: "single"
  }
}, {
  match: {
    hw: "b200",
    variant: "pro",
    quant: "fp4",
    strategy: "balanced",
    nodes: "single"
  },
  sglang_version: "0.5.12.post1",
  speed: [{
    workload: {
      dataset: "random",
      isl: 8192,
      osl: 1024,
      max_concurrency: 64
    },
    ttft_ms: 2326,
    tpot_ms: 69.9,
    tokens_per_sec_per_gpu: 99
  }, {
    workload: {
      dataset: "random",
      isl: 8192,
      osl: 1024,
      max_concurrency: 256
    },
    ttft_ms: 7242,
    tpot_ms: 152.09,
    tokens_per_sec_per_gpu: 192
  }]
}, {
  match: {
    hw: "b200",
    variant: "pro",
    quant: "fp4",
    strategy: "high-throughput",
    nodes: "single"
  }
}, {
  match: {
    hw: "b300",
    variant: "flash",
    quant: "fp4",
    strategy: "low-latency",
    nodes: "single"
  },
  sglang_version: "0.5.12.post1",
  speed: [{
    workload: {
      dataset: "random",
      isl: 8192,
      osl: 1024,
      max_concurrency: 1
    },
    ttft_ms: 88,
    tpot_ms: 3.67,
    tokens_per_sec_per_gpu: 66
  }, {
    workload: {
      dataset: "random",
      isl: 8192,
      osl: 1024,
      max_concurrency: 16
    },
    ttft_ms: 266,
    tpot_ms: 6.06,
    tokens_per_sec_per_gpu: 495
  }]
}, {
  match: {
    hw: "b300",
    variant: "flash",
    quant: "fp4",
    strategy: "balanced",
    nodes: "single"
  },
  sglang_version: "0.5.12.post1",
  speed: [{
    workload: {
      dataset: "random",
      isl: 8192,
      osl: 1024,
      max_concurrency: 64
    },
    ttft_ms: 2363,
    tpot_ms: 34.4,
    tokens_per_sec_per_gpu: 402
  }, {
    workload: {
      dataset: "random",
      isl: 8192,
      osl: 1024,
      max_concurrency: 256
    },
    ttft_ms: 2812,
    tpot_ms: 51.65,
    tokens_per_sec_per_gpu: 1092
  }]
}, {
  match: {
    hw: "b300",
    variant: "flash",
    quant: "fp4",
    strategy: "high-throughput",
    nodes: "single"
  },
  sglang_version: "0.5.12.post1",
  speed: [{
    workload: {
      dataset: "random",
      isl: 8192,
      osl: 1024,
      max_concurrency: 1024
    },
    ttft_ms: 97028,
    tpot_ms: 65.09,
    tokens_per_sec_per_gpu: 966
  }, {
    workload: {
      dataset: "random",
      isl: 8192,
      osl: 1024,
      max_concurrency: 4096
    },
    ttft_ms: 243335,
    tpot_ms: 63.98,
    tokens_per_sec_per_gpu: 998
  }]
}, {
  match: {
    hw: "b300",
    variant: "pro",
    quant: "fp4",
    strategy: "low-latency",
    nodes: "single"
  },
  sglang_version: "0.5.12.post1",
  speed: [{
    workload: {
      dataset: "random",
      isl: 8192,
      osl: 1024,
      max_concurrency: 1
    },
    ttft_ms: 261,
    tpot_ms: 5.01,
    tokens_per_sec_per_gpu: 23
  }, {
    workload: {
      dataset: "random",
      isl: 8192,
      osl: 1024,
      max_concurrency: 16
    },
    ttft_ms: 364,
    tpot_ms: 11.37,
    tokens_per_sec_per_gpu: 137
  }]
}, {
  match: {
    hw: "b300",
    variant: "pro",
    quant: "fp4",
    strategy: "balanced",
    nodes: "single"
  },
  sglang_version: "0.5.12.post1",
  speed: [{
    workload: {
      dataset: "random",
      isl: 8192,
      osl: 1024,
      max_concurrency: 64
    },
    ttft_ms: 1866,
    tpot_ms: 54.48,
    tokens_per_sec_per_gpu: 139
  }, {
    workload: {
      dataset: "random",
      isl: 8192,
      osl: 1024,
      max_concurrency: 256
    },
    ttft_ms: 6325,
    tpot_ms: 123.95,
    tokens_per_sec_per_gpu: 237
  }]
}, {
  match: {
    hw: "b300",
    variant: "pro",
    quant: "fp4",
    strategy: "high-throughput",
    nodes: "single"
  }
}, {
  match: {
    hw: "gb200",
    variant: "flash",
    quant: "fp4",
    strategy: "low-latency",
    nodes: "single"
  }
}, {
  match: {
    hw: "gb200",
    variant: "flash",
    quant: "fp4",
    strategy: "balanced",
    nodes: "single"
  },
  sglang_version: "0.5.12.post1",
  speed: [{
    workload: {
      dataset: "random",
      isl: 8192,
      osl: 1024,
      max_concurrency: 64
    },
    ttft_ms: 2560,
    tpot_ms: 39.71,
    tokens_per_sec_per_gpu: 342
  }, {
    workload: {
      dataset: "random",
      isl: 8192,
      osl: 1024,
      max_concurrency: 256
    },
    ttft_ms: 3995,
    tpot_ms: 82.56,
    tokens_per_sec_per_gpu: 718
  }]
}, {
  match: {
    hw: "gb200",
    variant: "flash",
    quant: "fp4",
    strategy: "high-throughput",
    nodes: "single"
  }
}, {
  match: {
    hw: "gb200",
    variant: "pro",
    quant: "fp4",
    strategy: "low-latency",
    nodes: "multi-2"
  }
}, {
  match: {
    hw: "gb200",
    variant: "pro",
    quant: "fp4",
    strategy: "balanced",
    nodes: "multi-2"
  }
}, {
  match: {
    hw: "gb200",
    variant: "pro",
    quant: "fp4",
    strategy: "high-throughput",
    nodes: "multi-2"
  }
}, {
  match: {
    hw: "gb300",
    variant: "flash",
    quant: "fp4",
    strategy: "low-latency",
    nodes: "single"
  }
}, {
  match: {
    hw: "gb300",
    variant: "flash",
    quant: "fp4",
    strategy: "balanced",
    nodes: "single"
  },
  sglang_version: "0.5.12.post1",
  speed: [{
    workload: {
      dataset: "random",
      isl: 8192,
      osl: 1024,
      max_concurrency: 64
    },
    ttft_ms: 2671,
    tpot_ms: 45.88,
    tokens_per_sec_per_gpu: 299
  }, {
    workload: {
      dataset: "random",
      isl: 8192,
      osl: 1024,
      max_concurrency: 256
    },
    ttft_ms: 4823,
    tpot_ms: 94.04,
    tokens_per_sec_per_gpu: 637
  }]
}, {
  match: {
    hw: "gb300",
    variant: "flash",
    quant: "fp4",
    strategy: "high-throughput",
    nodes: "single"
  },
  sglang_version: "0.5.12.post1",
  speed: [{
    workload: {
      dataset: "random",
      isl: 8192,
      osl: 1024,
      max_concurrency: 1024
    },
    ttft_ms: 154868,
    tpot_ms: 104.84,
    tokens_per_sec_per_gpu: 621
  }, {
    workload: {
      dataset: "random",
      isl: 8192,
      osl: 1024,
      max_concurrency: 4096
    },
    ttft_ms: 386489,
    tpot_ms: 103.37,
    tokens_per_sec_per_gpu: 627
  }]
}, {
  match: {
    hw: "gb300",
    variant: "pro",
    quant: "fp4",
    strategy: "low-latency",
    nodes: "single"
  }
}, {
  match: {
    hw: "gb300",
    variant: "pro",
    quant: "fp4",
    strategy: "balanced",
    nodes: "single"
  }
}, {
  match: {
    hw: "gb300",
    variant: "pro",
    quant: "fp4",
    strategy: "high-throughput",
    nodes: "single"
  }
}, {
  match: {
    hw: "h200",
    variant: "flash",
    quant: "fp8",
    strategy: "low-latency",
    nodes: "single"
  },
  sglang_version: "0.5.12.post1",
  speed: [{
    workload: {
      dataset: "random",
      isl: 8192,
      osl: 1024,
      max_concurrency: 1
    },
    ttft_ms: 204,
    tpot_ms: 3.38,
    tokens_per_sec_per_gpu: 68
  }, {
    workload: {
      dataset: "random",
      isl: 8192,
      osl: 1024,
      max_concurrency: 16
    },
    ttft_ms: 538,
    tpot_ms: 11.42,
    tokens_per_sec_per_gpu: 264
  }]
}, {
  match: {
    hw: "h200",
    variant: "flash",
    quant: "fp8",
    strategy: "balanced",
    nodes: "single"
  },
  sglang_version: "0.5.12.post1",
  speed: [{
    workload: {
      dataset: "random",
      isl: 8192,
      osl: 1024,
      max_concurrency: 64
    },
    ttft_ms: 738,
    tpot_ms: 36.27,
    tokens_per_sec_per_gpu: 385
  }, {
    workload: {
      dataset: "random",
      isl: 8192,
      osl: 1024,
      max_concurrency: 256
    },
    ttft_ms: 39806,
    tpot_ms: 80.13,
    tokens_per_sec_per_gpu: 393
  }]
}, {
  match: {
    hw: "h200",
    variant: "flash",
    quant: "fp8",
    strategy: "high-throughput",
    nodes: "single"
  },
  sglang_version: "0.5.12.post1",
  speed: [{
    workload: {
      dataset: "random",
      isl: 8192,
      osl: 1024,
      max_concurrency: 1024
    },
    ttft_ms: 195293,
    tpot_ms: 130.35,
    tokens_per_sec_per_gpu: 493
  }, {
    workload: {
      dataset: "random",
      isl: 8192,
      osl: 1024,
      max_concurrency: 4096
    },
    ttft_ms: 502615,
    tpot_ms: 130.31,
    tokens_per_sec_per_gpu: 490
  }]
}, {
  match: {
    hw: "h200",
    variant: "pro",
    quant: "fp8",
    strategy: "low-latency",
    nodes: "multi-2"
  }
}, {
  match: {
    hw: "h200",
    variant: "pro",
    quant: "fp8",
    strategy: "balanced",
    nodes: "multi-2"
  }
}, {
  match: {
    hw: "h200",
    variant: "pro",
    quant: "fp8",
    strategy: "high-throughput",
    nodes: "multi-2"
  }
}, {
  match: {
    hw: "h200",
    variant: "flash",
    quant: "fp4",
    strategy: "low-latency",
    nodes: "single"
  },
  sglang_version: "0.5.12.post1",
  speed: [{
    workload: {
      dataset: "random",
      isl: 8192,
      osl: 1024,
      max_concurrency: 1
    },
    ttft_ms: 193,
    tpot_ms: 3.38,
    tokens_per_sec_per_gpu: 67
  }, {
    workload: {
      dataset: "random",
      isl: 8192,
      osl: 1024,
      max_concurrency: 16
    },
    ttft_ms: 598,
    tpot_ms: 10.46,
    tokens_per_sec_per_gpu: 308
  }]
}, {
  match: {
    hw: "h200",
    variant: "flash",
    quant: "fp4",
    strategy: "balanced",
    nodes: "single"
  }
}, {
  match: {
    hw: "h200",
    variant: "flash",
    quant: "fp4",
    strategy: "high-throughput",
    nodes: "single"
  }
}, {
  match: {
    hw: "h200",
    variant: "pro",
    quant: "fp4",
    strategy: "low-latency",
    nodes: "single"
  }
}, {
  match: {
    hw: "h200",
    variant: "pro",
    quant: "fp4",
    strategy: "balanced",
    nodes: "single"
  }
}, {
  match: {
    hw: "h200",
    variant: "pro",
    quant: "fp4",
    strategy: "high-throughput",
    nodes: "single"
  }
}, {
  match: {
    hw: "h100",
    variant: "flash",
    quant: "fp4",
    strategy: "low-latency",
    nodes: "single"
  }
}, {
  match: {
    hw: "h100",
    variant: "flash",
    quant: "fp4",
    strategy: "balanced",
    nodes: "single"
  }
}, {
  match: {
    hw: "h100",
    variant: "flash",
    quant: "fp4",
    strategy: "high-throughput",
    nodes: "single"
  }
}, {
  match: {
    hw: "h100",
    variant: "pro",
    quant: "fp4",
    strategy: "low-latency",
    nodes: "multi-2"
  }
}, {
  match: {
    hw: "h100",
    variant: "pro",
    quant: "fp4",
    strategy: "balanced",
    nodes: "multi-2"
  }
}, {
  match: {
    hw: "h100",
    variant: "pro",
    quant: "fp4",
    strategy: "high-throughput",
    nodes: "multi-2"
  }
}];

export const config = {
  modelName: "DeepSeek-V4",
  supportedHardware: ["h100", "h200", "b200", "b300", "gb200", "gb300", "rtx6000"],
  hardware: [{
    id: "rtx6000",
    label: "RTX PRO 6000",
    vram: "96GB",
    vendor: "nvidia"
  }],
  variants: [{
    id: "flash",
    label: "Flash",
    subtitle: "284B"
  }, {
    id: "pro",
    label: "Pro",
    subtitle: "1.6T"
  }],
  quantizations: [{
    id: "fp8",
    label: "FP8"
  }, {
    id: "fp4",
    label: "FP4"
  }],
  strategies: [{
    id: "low-latency",
    label: "Low-Latency"
  }, {
    id: "balanced",
    label: "Balanced"
  }, {
    id: "high-throughput",
    label: "High-Throughput"
  }],
  nodesOptions: [{
    id: "single",
    label: "Single Node"
  }, {
    id: "multi-2",
    label: "Multi-Nodes"
  }],
  modelNames: {
    "flash|fp4": "deepseek-ai/DeepSeek-V4-Flash",
    "flash|fp8": "deepseek-ai/DeepSeek-V4-Flash",
    "pro|fp4": "deepseek-ai/DeepSeek-V4-Pro",
    "pro|fp8": "deepseek-ai/DeepSeek-V4-Pro",
    "h200|flash|fp8": "sgl-project/DeepSeek-V4-Flash-FP8",
    "h200|pro|fp8": "sgl-project/DeepSeek-V4-Pro-FP8"
  },
  placeholders: {
    HOST_IP: {
      target: "command",
      label: "Bind host",
      default: "0.0.0.0"
    },
    PORT: {
      target: "command",
      label: "Bind port",
      default: "30000"
    },
    NODE0_IP: {
      target: "command",
      label: "Head node IP",
      default: "<node0-ip>"
    },
    NODE_RANK: {
      target: "command",
      label: "This node rank",
      default: "<node-rank>"
    },
    HF_TOKEN: {
      target: "command",
      label: "HF token (Docker)",
      default: "<your-hf-token>"
    },
    CURL_HOST: {
      target: "curl",
      label: "Server host",
      default: "localhost"
    },
    CURL_PORT: {
      target: "curl",
      label: "Server port",
      default: "30000"
    }
  },
  curl: `curl http://{{CURL_HOST}}:{{CURL_PORT}}/v1/chat/completions \\
-H 'Content-Type: application/json' \\
-d '{ "model": "{{MODEL_NAME}}", "messages": [{"role":"user","content":"Hello"}] }'`,
  benchmarkCommands: {
    speed: `python3 -m sglang.bench_serving \\
  --backend sglang \\
  --host {{CURL_HOST}} --port {{CURL_PORT}} \\
  --model {{MODEL_NAME}} \\
  --dataset-name {{DATASET}} \\
  --random-input-len {{ISL}} --random-output-len {{OSL}} \\
  --num-prompts {{NUM_PROMPTS}} --max-concurrency {{MAX_CONCURRENCY}} \\
  --warmup-requests 64`,
    accuracy: {
      gsm8k_pct: `# To install sgl-eval: pip install git+https://github.com/sgl-project/sgl-eval
sgl-eval run gsm8k \\
  --base-url http://{{CURL_HOST}}:{{CURL_PORT}}/v1 \\
  --num-threads 32`,
      gpqa_pct: {
        flash: `# To install sgl-eval: pip install git+https://github.com/sgl-project/sgl-eval
sgl-eval run gpqa \\
  --model {{MODEL_NAME}} --api-key <api-key> \\
  --n-repeats 16 --max-tokens 200000 \\
  --temperature 1.0 --top-p 1.0 --thinking \\
  --out-dir /sgl-workspace/logs \\
  --base-url http://{{CURL_HOST}}:{{CURL_PORT}}/v1`,
        pro: `# To install sgl-eval: pip install git+https://github.com/sgl-project/sgl-eval
sgl-eval run gpqa \\
  --model {{MODEL_NAME}} --api-key <api-key> \\
  --n-repeats 16 --max-tokens 400000 \\
  --temperature 1.0 --top-p 1.0 --thinking \\
  --out-dir /sgl-workspace/logs \\
  --base-url http://{{CURL_HOST}}:{{CURL_PORT}}/v1`
      },
      aime25_pct: {
        flash: `# To install sgl-eval: pip install git+https://github.com/sgl-project/sgl-eval
sgl-eval run aime25 \\
  --model {{MODEL_NAME}} --api-key <api-key> \\
  --n-repeats 16 --max-tokens 200000 \\
  --temperature 1.0 --top-p 1.0 --thinking \\
  --out-dir /sgl-workspace/logs \\
  --base-url http://{{CURL_HOST}}:{{CURL_PORT}}/v1`,
        pro: `# To install sgl-eval: pip install git+https://github.com/sgl-project/sgl-eval
sgl-eval run aime25 \\
  --model {{MODEL_NAME}} --api-key <api-key> \\
  --n-repeats 16 --max-tokens 400000 \\
  --temperature 1.0 --top-p 1.0 --thinking \\
  --out-dir /sgl-workspace/logs \\
  --base-url http://{{CURL_HOST}}:{{CURL_PORT}}/v1`
      }
    },
    numPromptsByConc: {
      1: 32,
      16: 32,
      64: 128,
      256: 512,
      1024: 2048,
      4096: 4096
    }
  },
  defaultAccuracy: {
    flash: {
      gpqa_pct: 88.1,
      aime25_pct: 95,
      gsm8k_pct: 96.13
    },
    pro: {
      gpqa_pct: 90.1,
      aime25_pct: 97.5,
      gsm8k_pct: 96.13
    }
  },
  accuracyLabels: [["gpqa_pct", "GPQA Diamond", "%"], ["aime25_pct", "AIME25", "%"], ["gsm8k_pct", "GSM8K (1-shot)", "%"]],
  multiNodeHints: {
    gb200: ["The following env vars may be needed depending on your cluster:", "  GLOO_SOCKET_IFNAME=<your-nic>", "  NVSHMEM_ENABLE_NIC_PE_MAPPING=1", "  NVSHMEM_HCA_LIST=<your-hca-list>"]
  },
  dockerImages: {
    h100: "lmsysorg/sglang:latest",
    h200: "lmsysorg/sglang:latest",
    b200: "lmsysorg/sglang:latest",
    b300: "lmsysorg/sglang:latest",
    gb200: "lmsysorg/sglang:latest",
    gb300: "lmsysorg/sglang:latest"
  },
  github: {
    cookbookModel: "deepseek-ai/deepseek-v4"
  },
  playgroundFeatures: {
    attention: {
      knobs: [{
        id: "tp",
        label: "TP",
        values: [null, {
          value: 1,
          hide: {
            variant: ["pro"]
          }
        }, {
          value: 2,
          hide: {
            variant: ["pro"]
          }
        }, 4, 8, {
          value: 16,
          disable: {
            nodes: ["single"]
          },
          disableReason: "TP=16 requires 16 ranks — switch the Deploy panel's Nodes to Multi-Nodes first."
        }]
      }, {
        id: "cp",
        label: "CP",
        values: [null, 1, 2, 4]
      }, {
        id: "dpAttn",
        label: "DP-Attention",
        values: [null, false, {
          value: 1,
          hide: {
            variant: ["pro"]
          }
        }, {
          value: 2,
          hide: {
            variant: ["pro"]
          }
        }, 4, 8, {
          value: 16,
          disable: {
            nodes: ["single"]
          },
          disableReason: "DP-Attention=16 requires 16 ranks — switch the Deploy panel's Nodes to Multi-Nodes first."
        }],
        labels: {
          "auto": "Auto",
          "false": "Off"
        }
      }]
    },
    moe: {
      backend: {
        options: [{
          id: null,
          label: "Inherited"
        }, {
          id: "deepep",
          label: "DeepEP",
          flags: ["--moe-a2a-backend deepep"]
        }, {
          id: "megamoe",
          label: "MegaMoE",
          flags: ["--moe-a2a-backend megamoe"],
          requiresHw: ["b200", "b300", "gb200", "gb300"]
        }, {
          id: "flashinfer_mxfp4",
          label: "FlashInfer (MXFP4)",
          flags: ["--moe-runner-backend flashinfer_mxfp4"]
        }, {
          id: "marlin",
          label: "Marlin (W4A16)",
          flags: ["--moe-runner-backend marlin"]
        }]
      },
      megamoeQuant: {
        stripEnv: ["SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK"],
        options: [{
          id: "w4a8",
          label: "W4A8",
          env: ["SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK=8320"]
        }, {
          id: "w4a4",
          label: "W4A4",
          env: ["SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK=8320", "SGLANG_OPT_DEEPGEMM_MEGA_MOE_USE_FP4_ACTS=1", "SGLANG_OPT_DEEPGEMM_MEGA_MOE_USE_MXF4_KIND=1"]
        }]
      },
      ep: {
        label: "EP",
        values: [null, {
          value: 1,
          hide: {
            variant: ["pro"]
          }
        }, {
          value: 2,
          hide: {
            variant: ["pro"]
          }
        }, 4, 8, {
          value: 16,
          disable: {
            nodes: ["single"]
          },
          disableReason: "EP=16 requires 16 ranks — switch the Deploy panel's Nodes to Multi-Nodes first."
        }]
      }
    },
    parsers: {
      items: [{
        id: "reasoning",
        label: "Reasoning Parser",
        flag: "--reasoning-parser deepseek-v4"
      }, {
        id: "toolCall",
        label: "Tool Call Parser",
        flag: "--tool-call-parser deepseekv4"
      }]
    },
    speculative: {
      options: [{
        id: "current",
        label: "Inherited from base"
      }, {
        id: "off",
        label: "Off (greedy)"
      }, {
        id: "mtp-314",
        label: "EAGLE / MTP 3-1-4",
        flags: ["--speculative-algorithm EAGLE", "--speculative-num-steps 3", "--speculative-eagle-topk 1", "--speculative-num-draft-tokens 4"]
      }, {
        id: "mtp-112",
        label: "EAGLE / MTP 1-1-2",
        flags: ["--speculative-algorithm EAGLE", "--speculative-num-steps 1", "--speculative-eagle-topk 1", "--speculative-num-draft-tokens 2"]
      }, {
        id: "ngram",
        label: "NGRAM",
        flags: ["--speculative-algorithm NGRAM", "--speculative-num-draft-tokens 16", "--speculative-ngram-max-bfs-breadth 10"],
        disable: {
          dpAttnOn: [true]
        },
        disableReason: "NGRAM is incompatible with DP-Attention. Turn DP-Attention off in the Attention card above to use NGRAM."
      }, {
        id: "dflash",
        label: "DFlash",
        disabled: true,
        disableReason: "Coming soon — pending DFlash kernel integration."
      }]
    },
    pdDisagg: {
      modes: [{
        id: "off",
        label: "Off"
      }, {
        id: "prefill",
        label: "Prefill role"
      }, {
        id: "decode",
        label: "Decode role"
      }],
      transferBackends: [{
        id: "mooncake",
        label: "Mooncake",
        env: ["NCCL_MNNVL_ENABLE=1", "NCCL_CUMEM_ENABLE=1", "SGLANG_MOONCAKE_CUSTOM_MEM_POOL=True", "MC_FORCE_MNNVL=1"],
        envWhen: {
          hw: ["gb200", "gb300"]
        }
      }, {
        id: "nixl",
        label: "NiXL"
      }],
      ibDevices: [{
        id: "auto",
        label: "Auto"
      }, "mlx5_0", "mlx5_7"],
      router: {
        port: 8000,
        command: `python3 -m sglang_router.launch_router \\
  --pd-disaggregation \\
  --prefill http://<prefill-host>:{{PREFILL_PORT}} \\
  --decode http://<decode-host>:{{DECODE_PORT}} \\
  --host 0.0.0.0 --port {{ROUTER_PORT}} \\
  --disable-circuit-breaker \\
  --health-check-interval-secs 999999`
      }
    },
    hicache: {
      excludesHw: ["rtx6000"],
      backends: [{
        id: null,
        label: "Auto"
      }, {
        id: "file",
        label: "File"
      }, {
        id: "mooncake",
        label: "Mooncake"
      }, {
        id: "hf3fs",
        label: "HF3FS"
      }, {
        id: "nixl",
        label: "NiXL"
      }],
      writePolicies: [{
        id: "auto",
        label: "Auto"
      }, {
        id: "write_through",
        label: "Write-through"
      }, {
        id: "write_back",
        label: "Write-back"
      }, {
        id: "write_through_selective",
        label: "Write-through (selective)"
      }]
    },
    hisparse: {
      requiredFlags: ["--disable-radix-cache"],
      config: {
        top_k: 2048,
        device_buffer_size: 6144
      },
      hostRatios: [{
        id: 5,
        label: "5 (~1TB host)"
      }, {
        id: 10,
        label: "10 (~2TB host)"
      }],
      defaultHostRatio: 10
    }
  },
  cells: [{
    match: {
      hw: "b200",
      variant: "flash",
      quant: "fp4",
      strategy: "low-latency",
      nodes: "single"
    },
    verified: true,
    env: [],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 4", "--moe-runner-backend flashinfer_mxfp4", "--speculative-algorithm EAGLE", "--speculative-num-steps 3", "--speculative-eagle-topk 1", "--speculative-num-draft-tokens 4", "--chunked-prefill-size 4096", "--disable-flashinfer-autotune", "--swa-full-tokens-ratio 0.1", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "b200",
      variant: "flash",
      quant: "fp4",
      strategy: "balanced",
      nodes: "single"
    },
    verified: true,
    env: ["SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK=1024"],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 4", "--dp 4", "--enable-dp-attention", "--moe-a2a-backend deepep", "--speculative-algorithm EAGLE", "--speculative-num-steps 1", "--speculative-eagle-topk 1", "--speculative-num-draft-tokens 2", "--deepep-config '{\"normal_dispatch\":{\"num_sms\":96},\"normal_combine\":{\"num_sms\":96}}'", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "b200",
      variant: "flash",
      quant: "fp4",
      strategy: "high-throughput",
      nodes: "single"
    },
    verified: true,
    env: ["SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK=8320"],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 4", "--dp 4", "--enable-dp-attention", "--moe-a2a-backend megamoe", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "b200",
      variant: "pro",
      quant: "fp4",
      strategy: "low-latency",
      nodes: "single"
    },
    verified: true,
    env: [],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 8", "--moe-runner-backend flashinfer_mxfp4", "--speculative-algorithm EAGLE", "--speculative-num-steps 3", "--speculative-eagle-topk 1", "--speculative-num-draft-tokens 4", "--chunked-prefill-size 8192", "--disable-flashinfer-autotune", "--swa-full-tokens-ratio 0.1", "--mem-fraction-static 0.90", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "b200",
      variant: "pro",
      quant: "fp4",
      strategy: "balanced",
      nodes: "single"
    },
    verified: true,
    env: [],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 8", "--dp 8", "--enable-dp-attention", "--moe-runner-backend flashinfer_mxfp4", "--disable-flashinfer-autotune", "--chunked-prefill-size 32768", "--swa-full-tokens-ratio 0.1", "--speculative-algorithm EAGLE", "--speculative-num-steps 1", "--speculative-eagle-topk 1", "--speculative-num-draft-tokens 2", "--mem-fraction-static 0.92", "--cuda-graph-max-bs 256", "--deepep-config '{\"normal_dispatch\":{\"num_sms\":96},\"normal_combine\":{\"num_sms\":96}}'", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "b200",
      variant: "pro",
      quant: "fp4",
      strategy: "high-throughput",
      nodes: "single"
    },
    verified: true,
    env: ["SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK=8320"],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 8", "--dp 8", "--enable-dp-attention", "--moe-a2a-backend megamoe", "--mem-fraction-static 0.835", "--cuda-graph-max-bs 544", "--swa-full-tokens-ratio 0.075", "--chunked-prefill-size 65536", "--tokenizer-worker-num 8", "--enable-prefill-delayer", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "b300",
      variant: "flash",
      quant: "fp4",
      strategy: "low-latency",
      nodes: "single"
    },
    verified: true,
    env: [],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 4", "--moe-runner-backend flashinfer_mxfp4", "--speculative-algorithm EAGLE", "--speculative-num-steps 3", "--speculative-eagle-topk 1", "--speculative-num-draft-tokens 4", "--chunked-prefill-size 4096", "--disable-flashinfer-autotune", "--swa-full-tokens-ratio 0.1", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "b300",
      variant: "flash",
      quant: "fp4",
      strategy: "balanced",
      nodes: "single"
    },
    verified: true,
    env: ["SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK=1024"],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 4", "--dp 4", "--enable-dp-attention", "--moe-a2a-backend deepep", "--speculative-algorithm EAGLE", "--speculative-num-steps 1", "--speculative-eagle-topk 1", "--speculative-num-draft-tokens 2", "--deepep-config '{\"normal_dispatch\":{\"num_sms\":96},\"normal_combine\":{\"num_sms\":96}}'", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "b300",
      variant: "flash",
      quant: "fp4",
      strategy: "high-throughput",
      nodes: "single"
    },
    verified: true,
    env: ["SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK=8320"],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 4", "--dp 4", "--enable-dp-attention", "--moe-a2a-backend megamoe", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "b300",
      variant: "pro",
      quant: "fp4",
      strategy: "low-latency",
      nodes: "single"
    },
    verified: true,
    env: [],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 8", "--moe-runner-backend flashinfer_mxfp4", "--speculative-algorithm EAGLE", "--speculative-num-steps 3", "--speculative-eagle-topk 1", "--speculative-num-draft-tokens 4", "--chunked-prefill-size 8192", "--disable-flashinfer-autotune", "--swa-full-tokens-ratio 0.1", "--mem-fraction-static 0.90", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "b300",
      variant: "pro",
      quant: "fp4",
      strategy: "balanced",
      nodes: "single"
    },
    verified: true,
    env: [],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 8", "--dp 8", "--enable-dp-attention", "--moe-runner-backend flashinfer_mxfp4", "--disable-flashinfer-autotune", "--chunked-prefill-size 32768", "--swa-full-tokens-ratio 0.1", "--speculative-algorithm EAGLE", "--speculative-num-steps 1", "--speculative-eagle-topk 1", "--speculative-num-draft-tokens 2", "--mem-fraction-static 0.92", "--cuda-graph-max-bs 256", "--deepep-config '{\"normal_dispatch\":{\"num_sms\":96},\"normal_combine\":{\"num_sms\":96}}'", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "b300",
      variant: "pro",
      quant: "fp4",
      strategy: "high-throughput",
      nodes: "single"
    },
    verified: true,
    env: ["SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK=8320"],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 8", "--dp 8", "--enable-dp-attention", "--moe-a2a-backend megamoe", "--mem-fraction-static 0.835", "--cuda-graph-max-bs 544", "--swa-full-tokens-ratio 0.075", "--chunked-prefill-size 65536", "--tokenizer-worker-num 8", "--enable-prefill-delayer", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "gb200",
      variant: "flash",
      quant: "fp4",
      strategy: "low-latency",
      nodes: "single"
    },
    verified: true,
    env: [],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 4", "--moe-runner-backend flashinfer_mxfp4", "--speculative-algorithm EAGLE", "--speculative-num-steps 3", "--speculative-eagle-topk 1", "--speculative-num-draft-tokens 4", "--chunked-prefill-size 4096", "--disable-flashinfer-autotune", "--swa-full-tokens-ratio 0.1", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "gb200",
      variant: "flash",
      quant: "fp4",
      strategy: "balanced",
      nodes: "single"
    },
    verified: true,
    env: ["SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK=1024"],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 4", "--dp 4", "--enable-dp-attention", "--moe-a2a-backend deepep", "--speculative-algorithm EAGLE", "--speculative-num-steps 1", "--speculative-eagle-topk 1", "--speculative-num-draft-tokens 2", "--deepep-config '{\"normal_dispatch\":{\"num_sms\":96},\"normal_combine\":{\"num_sms\":96}}'", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "gb200",
      variant: "flash",
      quant: "fp4",
      strategy: "high-throughput",
      nodes: "single"
    },
    verified: true,
    env: ["SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK=8320"],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 4", "--dp 4", "--enable-dp-attention", "--moe-a2a-backend megamoe", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "gb200",
      variant: "pro",
      quant: "fp4",
      strategy: "low-latency",
      nodes: "multi-2"
    },
    verified: true,
    env: ["NCCL_MNNVL_ENABLE=1", "NCCL_CUMEM_ENABLE=1", "SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK=256"],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 8", "--moe-runner-backend flashinfer_mxfp4", "--speculative-algorithm EAGLE", "--speculative-num-steps 3", "--speculative-eagle-topk 1", "--speculative-num-draft-tokens 4", "--chunked-prefill-size 8192", "--disable-flashinfer-autotune", "--swa-full-tokens-ratio 0.1", "--mem-fraction-static 0.90", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "gb200",
      variant: "pro",
      quant: "fp4",
      strategy: "balanced",
      nodes: "multi-2"
    },
    verified: true,
    env: ["NCCL_MNNVL_ENABLE=1", "NCCL_CUMEM_ENABLE=1", "SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK=256"],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 8", "--dp 8", "--enable-dp-attention", "--moe-a2a-backend deepep", "--speculative-algorithm EAGLE", "--speculative-num-steps 1", "--speculative-eagle-topk 1", "--speculative-num-draft-tokens 2", "--mem-fraction-static 0.78", "--cuda-graph-max-bs 64", "--max-running-requests 128", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "gb200",
      variant: "pro",
      quant: "fp4",
      strategy: "high-throughput",
      nodes: "multi-2"
    },
    verified: true,
    env: ["NCCL_MNNVL_ENABLE=1", "NCCL_CUMEM_ENABLE=1", "SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK=8320"],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 8", "--dp 8", "--enable-dp-attention", "--moe-a2a-backend megamoe", "--mem-fraction-static 0.78", "--cuda-graph-max-bs 64", "--max-running-requests 256", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "gb300",
      variant: "flash",
      quant: "fp4",
      strategy: "low-latency",
      nodes: "single"
    },
    verified: true,
    env: [],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 4", "--moe-runner-backend flashinfer_mxfp4", "--speculative-algorithm EAGLE", "--speculative-num-steps 3", "--speculative-eagle-topk 1", "--speculative-num-draft-tokens 4", "--chunked-prefill-size 4096", "--disable-flashinfer-autotune", "--swa-full-tokens-ratio 0.1", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "gb300",
      variant: "flash",
      quant: "fp4",
      strategy: "balanced",
      nodes: "single"
    },
    verified: true,
    env: ["SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK=1024"],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 4", "--dp 4", "--enable-dp-attention", "--moe-a2a-backend deepep", "--speculative-algorithm EAGLE", "--speculative-num-steps 1", "--speculative-eagle-topk 1", "--speculative-num-draft-tokens 2", "--deepep-config '{\"normal_dispatch\":{\"num_sms\":96},\"normal_combine\":{\"num_sms\":96}}'", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "gb300",
      variant: "flash",
      quant: "fp4",
      strategy: "high-throughput",
      nodes: "single"
    },
    verified: true,
    env: ["SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK=8320"],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 4", "--dp 4", "--enable-dp-attention", "--moe-a2a-backend megamoe", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "gb300",
      variant: "pro",
      quant: "fp4",
      strategy: "low-latency",
      nodes: "single"
    },
    verified: true,
    env: [],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 4", "--moe-runner-backend flashinfer_mxfp4", "--speculative-algorithm EAGLE", "--speculative-num-steps 3", "--speculative-eagle-topk 1", "--speculative-num-draft-tokens 4", "--chunked-prefill-size 8192", "--disable-flashinfer-autotune", "--swa-full-tokens-ratio 0.1", "--mem-fraction-static 0.90", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "gb300",
      variant: "pro",
      quant: "fp4",
      strategy: "balanced",
      nodes: "single"
    },
    verified: true,
    env: ["SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK=256"],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 4", "--dp 4", "--enable-dp-attention", "--moe-a2a-backend deepep", "--speculative-algorithm EAGLE", "--speculative-num-steps 1", "--speculative-eagle-topk 1", "--speculative-num-draft-tokens 2", "--mem-fraction-static 0.9", "--cuda-graph-max-bs 128", "--max-running-requests 256", "--deepep-config '{\"normal_dispatch\":{\"num_sms\":96},\"normal_combine\":{\"num_sms\":96}}'", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "gb300",
      variant: "pro",
      quant: "fp4",
      strategy: "high-throughput",
      nodes: "single"
    },
    verified: true,
    env: ["SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK=8320"],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 4", "--dp 4", "--enable-dp-attention", "--moe-a2a-backend megamoe", "--mem-fraction-static 0.9", "--cuda-graph-max-bs 128", "--max-running-requests 256", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "h200",
      variant: "flash",
      quant: "fp8",
      strategy: "low-latency",
      nodes: "single"
    },
    verified: true,
    env: ["SGLANG_DSV4_FP4_EXPERTS=0"],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 4", "--speculative-algorithm EAGLE", "--speculative-num-steps 3", "--speculative-eagle-topk 1", "--speculative-num-draft-tokens 4", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "h200",
      variant: "flash",
      quant: "fp8",
      strategy: "balanced",
      nodes: "single"
    },
    verified: true,
    env: ["SGLANG_DSV4_FP4_EXPERTS=0", "SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK=256"],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 4", "--dp 4", "--enable-dp-attention", "--moe-a2a-backend deepep", "--speculative-algorithm EAGLE", "--speculative-num-steps 1", "--speculative-eagle-topk 1", "--speculative-num-draft-tokens 2", "--cuda-graph-max-bs 128", "--max-running-requests 128", "--deepep-config '{\"normal_dispatch\":{\"num_sms\":96},\"normal_combine\":{\"num_sms\":96}}'", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "h200",
      variant: "flash",
      quant: "fp8",
      strategy: "high-throughput",
      nodes: "single"
    },
    verified: true,
    env: ["SGLANG_DSV4_FP4_EXPERTS=0", "SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK=256"],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 4", "--dp 4", "--enable-dp-attention", "--moe-a2a-backend deepep", "--cuda-graph-max-bs 128", "--max-running-requests 256", "--deepep-config '{\"normal_dispatch\":{\"num_sms\":96},\"normal_combine\":{\"num_sms\":96}}'", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "h200",
      variant: "pro",
      quant: "fp8",
      strategy: "low-latency",
      nodes: "multi-2"
    },
    verified: true,
    env: ["SGLANG_DSV4_FP4_EXPERTS=0", "SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK=128"],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 16", "--dp 16", "--enable-dp-attention", "--moe-a2a-backend deepep", "--cuda-graph-max-bs 8", "--max-running-requests 32", "--speculative-algorithm EAGLE", "--speculative-num-steps 3", "--speculative-eagle-topk 1", "--speculative-num-draft-tokens 4", "--mem-fraction-static 0.88", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "h200",
      variant: "pro",
      quant: "fp8",
      strategy: "balanced",
      nodes: "multi-2"
    },
    verified: true,
    env: ["SGLANG_DSV4_FP4_EXPERTS=0", "SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK=128"],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 16", "--dp 16", "--enable-dp-attention", "--moe-a2a-backend deepep", "--speculative-algorithm EAGLE", "--speculative-num-steps 1", "--speculative-eagle-topk 1", "--speculative-num-draft-tokens 2", "--mem-fraction-static 0.88", "--cuda-graph-max-bs 8", "--max-running-requests 32", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "h200",
      variant: "pro",
      quant: "fp8",
      strategy: "high-throughput",
      nodes: "multi-2"
    },
    verified: true,
    env: ["SGLANG_DSV4_FP4_EXPERTS=0", "SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK=128"],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 16", "--dp 16", "--enable-dp-attention", "--moe-a2a-backend deepep", "--mem-fraction-static 0.88", "--cuda-graph-max-bs 128", "--max-running-requests 256", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "h200",
      variant: "flash",
      quant: "fp4",
      strategy: "low-latency",
      nodes: "single"
    },
    verified: true,
    env: [],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 4", "--moe-runner-backend marlin", "--speculative-algorithm EAGLE", "--speculative-num-steps 3", "--speculative-eagle-topk 1", "--speculative-num-draft-tokens 4", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "h200",
      variant: "flash",
      quant: "fp4",
      strategy: "balanced",
      nodes: "single"
    },
    verified: true,
    env: [],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 4", "--moe-runner-backend flashinfer_mxfp4", "--speculative-algorithm EAGLE", "--speculative-num-steps 1", "--speculative-eagle-topk 1", "--speculative-num-draft-tokens 2", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "h200",
      variant: "flash",
      quant: "fp4",
      strategy: "high-throughput",
      nodes: "single"
    },
    verified: true,
    env: [],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 4", "--moe-runner-backend marlin", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "h200",
      variant: "pro",
      quant: "fp4",
      strategy: "low-latency",
      nodes: "single"
    },
    verified: true,
    env: [],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 8", "--moe-runner-backend flashinfer_mxfp4", "--speculative-algorithm EAGLE", "--speculative-num-steps 3", "--speculative-eagle-topk 1", "--speculative-num-draft-tokens 4", "--mem-fraction-static 0.83", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "h200",
      variant: "pro",
      quant: "fp4",
      strategy: "balanced",
      nodes: "single"
    },
    verified: true,
    env: [],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 8", "--moe-runner-backend flashinfer_mxfp4", "--speculative-algorithm EAGLE", "--speculative-num-steps 1", "--speculative-eagle-topk 1", "--speculative-num-draft-tokens 2", "--mem-fraction-static 0.88", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "h200",
      variant: "pro",
      quant: "fp4",
      strategy: "high-throughput",
      nodes: "single"
    },
    verified: true,
    env: [],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 8", "--moe-runner-backend flashinfer_mxfp4", "--mem-fraction-static 0.88", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "h100",
      variant: "flash",
      quant: "fp4",
      strategy: "low-latency",
      nodes: "single"
    },
    verified: true,
    env: [],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 8", "--moe-runner-backend marlin", "--speculative-algorithm EAGLE", "--speculative-num-steps 3", "--speculative-eagle-topk 1", "--speculative-num-draft-tokens 4", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "h100",
      variant: "flash",
      quant: "fp4",
      strategy: "balanced",
      nodes: "single"
    },
    verified: true,
    env: [],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 8", "--moe-runner-backend marlin", "--speculative-algorithm EAGLE", "--speculative-num-steps 1", "--speculative-eagle-topk 1", "--speculative-num-draft-tokens 2", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "h100",
      variant: "flash",
      quant: "fp4",
      strategy: "high-throughput",
      nodes: "single"
    },
    verified: true,
    env: [],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 8", "--moe-runner-backend marlin", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "h100",
      variant: "pro",
      quant: "fp4",
      strategy: "low-latency",
      nodes: "multi-2"
    },
    verified: true,
    env: ["SGLANG_SHARED_EXPERT_TP1=1"],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 16", "--moe-runner-backend marlin", "--speculative-algorithm EAGLE", "--speculative-num-steps 3", "--speculative-eagle-topk 1", "--speculative-num-draft-tokens 4", "--mem-fraction-static 0.9", "--cuda-graph-max-bs 8", "--max-running-requests 32", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "h100",
      variant: "pro",
      quant: "fp4",
      strategy: "balanced",
      nodes: "multi-2"
    },
    verified: true,
    env: ["SGLANG_SHARED_EXPERT_TP1=1"],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 16", "--moe-runner-backend marlin", "--speculative-algorithm EAGLE", "--speculative-num-steps 1", "--speculative-eagle-topk 1", "--speculative-num-draft-tokens 2", "--mem-fraction-static 0.9", "--cuda-graph-max-bs 8", "--max-running-requests 32", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "h100",
      variant: "pro",
      quant: "fp4",
      strategy: "high-throughput",
      nodes: "multi-2"
    },
    verified: true,
    env: ["SGLANG_SHARED_EXPERT_TP1=1"],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 16", "--moe-runner-backend marlin", "--mem-fraction-static 0.9", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }, {
    match: {
      hw: "rtx6000",
      variant: "flash",
      quant: "fp4",
      strategy: "low-latency",
      nodes: "single"
    },
    verified: true,
    env: [],
    flags: ["--trust-remote-code", "--model-path {{MODEL_NAME}}", "--tp 4", "--moe-runner-backend marlin", "--mem-fraction-static 0.70", "--cuda-graph-max-bs 32", "--host {{HOST_IP}}", "--port {{PORT}}"]
  }]
};

export const Deployment = ({config, benchmarks}) => {
  if (!config) {
    return <div style={{
      padding: 12,
      color: "#b91c1c"
    }}>Deployment: missing <code>config</code> prop</div>;
  }
  const HARDWARE_CATALOG = {
    nvidia: [{
      id: "h100",
      label: "H100",
      vram: "80GB"
    }, {
      id: "h200",
      label: "H200",
      vram: "141GB"
    }, {
      id: "b200",
      label: "B200",
      vram: "192GB"
    }, {
      id: "b300",
      label: "B300",
      vram: "288GB"
    }, {
      id: "gb200",
      label: "GB200",
      vram: "192GB"
    }, {
      id: "gb300",
      label: "GB300",
      vram: "288GB"
    }],
    amd: [{
      id: "mi300x",
      label: "MI300X",
      vram: "192GB"
    }, {
      id: "mi325x",
      label: "MI325X",
      vram: "256GB"
    }, {
      id: "mi350x",
      label: "MI350X",
      vram: "288GB"
    }, {
      id: "mi355x",
      label: "MI355X",
      vram: "288GB"
    }]
  };
  const makeStyles = isDark => ({
    container: {
      maxWidth: "900px",
      margin: "0 auto",
      display: "flex",
      flexDirection: "column",
      gap: "3px"
    },
    card: {
      padding: "5px 10px",
      border: `1px solid ${isDark ? "#374151" : "#e5e7eb"}`,
      borderLeft: `3px solid ${isDark ? "#E85D4D" : "#D45D44"}`,
      borderRadius: "4px",
      display: "flex",
      alignItems: "center",
      gap: "10px",
      background: isDark ? "#1f2937" : "#fff"
    },
    cardColumn: {
      padding: "5px 10px",
      border: `1px solid ${isDark ? "#374151" : "#e5e7eb"}`,
      borderLeft: `3px solid ${isDark ? "#E85D4D" : "#D45D44"}`,
      borderRadius: "4px",
      display: "flex",
      flexDirection: "column",
      gap: "4px",
      background: isDark ? "#1f2937" : "#fff"
    },
    title: {
      fontSize: "12px",
      fontWeight: "600",
      minWidth: "108px",
      flexShrink: 0,
      color: isDark ? "#e5e7eb" : "inherit"
    },
    vendorRow: {
      display: "flex",
      alignItems: "center",
      gap: "6px"
    },
    vendorLabel: {
      fontSize: "10px",
      fontWeight: "600",
      color: isDark ? "#9ca3af" : "#6b7280",
      minWidth: "38px",
      textTransform: "uppercase",
      letterSpacing: "0.04em"
    },
    itemsGrid: () => ({
      display: "grid",
      gridTemplateColumns: "repeat(auto-fit, minmax(72px, 1fr))",
      gap: "4px",
      flex: 1
    }),
    labelBase: {
      padding: "2px 8px",
      border: `1px solid ${isDark ? "#9ca3af" : "#d1d5db"}`,
      borderRadius: "3px",
      cursor: "pointer",
      display: "inline-flex",
      flexDirection: "column",
      alignItems: "center",
      justifyContent: "center",
      fontWeight: "500",
      fontSize: "12px",
      transition: "all 0.2s",
      userSelect: "none",
      minHeight: "26px",
      textAlign: "center",
      background: isDark ? "#374151" : "#fff",
      color: isDark ? "#e5e7eb" : "inherit"
    },
    checked: {
      background: "#D45D44",
      color: "white",
      borderColor: "#D45D44"
    },
    disabled: {
      cursor: "not-allowed",
      opacity: 0.4
    },
    subtitle: {
      display: "block",
      fontSize: "9px",
      marginTop: "1px",
      lineHeight: "1.1",
      opacity: 0.7
    },
    commandWrap: {
      position: "relative",
      flex: 1,
      background: isDark ? "#111827" : "#f5f5f5",
      borderRadius: "6px",
      border: `1px solid ${isDark ? "#374151" : "#e5e7eb"}`,
      overflow: "hidden"
    },
    commandHeader: {
      display: "flex",
      flexWrap: "wrap",
      justifyContent: "space-between",
      alignItems: "center",
      gap: "6px 10px",
      padding: "6px 10px",
      borderBottom: `1px solid ${isDark ? "#374151" : "#e5e7eb"}`,
      background: isDark ? "#1f2937" : "#fafafa"
    },
    commandPre: {
      padding: "12px 16px",
      fontFamily: "'Menlo', 'Monaco', 'Courier New', monospace",
      fontSize: "12px",
      lineHeight: "1.5",
      color: isDark ? "#e5e7eb" : "#374151",
      whiteSpace: "pre-wrap",
      overflowX: "auto",
      margin: 0
    },
    mtpWarn: {
      margin: "8px 0 0",
      padding: "8px 12px",
      borderRadius: "8px",
      fontSize: "12px",
      lineHeight: "1.45",
      background: isDark ? "#78350f" : "#fef3c7",
      color: isDark ? "#fde68a" : "#92400e",
      border: `1px solid ${isDark ? "#92400e" : "#fcd34d"}`
    },
    badge: verified => ({
      display: "inline-flex",
      alignItems: "center",
      gap: "6px",
      padding: "2px 8px",
      borderRadius: "10px",
      background: verified ? isDark ? "#064e3b" : "#d1fae5" : isDark ? "#78350f" : "#fef3c7",
      color: verified ? isDark ? "#a7f3d0" : "#065f46" : isDark ? "#fde68a" : "#92400e",
      fontSize: "11px",
      fontWeight: 600
    }),
    badgeDot: verified => ({
      width: "8px",
      height: "8px",
      borderRadius: "50%",
      background: verified ? "#10b981" : "#f59e0b"
    }),
    iconButton: {
      padding: "4px 10px",
      border: `1px solid ${isDark ? "#4b5563" : "#d1d5db"}`,
      borderRadius: "4px",
      background: isDark ? "#1f2937" : "#fff",
      color: isDark ? "#e5e7eb" : "#374151",
      fontSize: "11px",
      fontWeight: 500,
      cursor: "pointer",
      display: "inline-flex",
      alignItems: "center",
      gap: "4px"
    },
    iconRow: {
      display: "inline-flex",
      flexWrap: "wrap",
      gap: "6px"
    },
    runModeWrap: {
      display: "inline-flex",
      border: `1px solid ${isDark ? "#4b5563" : "#d1d5db"}`,
      borderRadius: "10px",
      overflow: "hidden",
      fontSize: "11px",
      fontWeight: 600,
      userSelect: "none"
    },
    runModeChip: active => ({
      padding: "2px 10px",
      cursor: "pointer",
      background: active ? isDark ? "#1f2937" : "#fff" : "transparent",
      color: active ? isDark ? "#e5e7eb" : "#111827" : isDark ? "#9ca3af" : "#6b7280",
      borderRight: `1px solid ${isDark ? "#4b5563" : "#d1d5db"}`
    }),
    runModeChipLast: active => ({
      padding: "2px 10px",
      cursor: "pointer",
      background: active ? isDark ? "#1f2937" : "#fff" : "transparent",
      color: active ? isDark ? "#e5e7eb" : "#111827" : isDark ? "#9ca3af" : "#6b7280"
    }),
    headerLeft: {
      display: "inline-flex",
      flexWrap: "wrap",
      alignItems: "center",
      gap: "8px"
    },
    modalBackdrop: {
      position: "fixed",
      inset: 0,
      background: "rgba(0,0,0,0.5)",
      display: "flex",
      alignItems: "center",
      justifyContent: "center",
      zIndex: 9999
    },
    modalBox: {
      background: isDark ? "#1f2937" : "#fff",
      color: isDark ? "#e5e7eb" : "#111827",
      borderRadius: "8px",
      padding: "20px",
      maxWidth: "720px",
      width: "92%",
      maxHeight: "85vh",
      overflowY: "auto",
      border: `1px solid ${isDark ? "#374151" : "#e5e7eb"}`,
      boxShadow: "0 10px 25px rgba(0,0,0,0.25)"
    },
    modalHeader: {
      display: "flex",
      justifyContent: "space-between",
      alignItems: "center",
      marginBottom: "12px"
    },
    modalTitle: {
      fontSize: "15px",
      fontWeight: 600
    },
    modalCloseBtn: {
      background: "transparent",
      border: "none",
      color: "inherit",
      fontSize: "20px",
      cursor: "pointer",
      padding: "0 6px",
      lineHeight: 1
    },
    formField: {
      display: "flex",
      flexDirection: "column",
      gap: "4px",
      marginBottom: "10px"
    },
    formLabel: {
      fontSize: "12px",
      fontWeight: 500,
      color: isDark ? "#9ca3af" : "#4b5563"
    },
    formInput: {
      padding: "6px 10px",
      fontSize: "13px",
      border: `1px solid ${isDark ? "#4b5563" : "#d1d5db"}`,
      borderRadius: "4px",
      background: isDark ? "#111827" : "#fff",
      color: isDark ? "#e5e7eb" : "#111827",
      fontFamily: "'Menlo', 'Monaco', 'Courier New', monospace"
    },
    sectionHeading: {
      fontSize: "12px",
      fontWeight: 600,
      textTransform: "uppercase",
      letterSpacing: "0.04em",
      color: isDark ? "#9ca3af" : "#6b7280",
      margin: "12px 0 6px 0"
    },
    primaryBtn: {
      padding: "6px 14px",
      background: "#D45D44",
      color: "white",
      border: "none",
      borderRadius: "4px",
      cursor: "pointer",
      fontSize: "13px",
      fontWeight: 500
    },
    benchCard: {
      padding: "8px 12px",
      border: `1px solid ${isDark ? "#374151" : "#e5e7eb"}`,
      borderLeft: `3px solid ${isDark ? "#E85D4D" : "#D45D44"}`,
      borderRadius: "4px",
      background: isDark ? "#1f2937" : "#fff",
      display: "flex",
      flexDirection: "column",
      gap: "8px"
    },
    benchHeader: {
      display: "flex",
      flexWrap: "wrap",
      alignItems: "baseline",
      justifyContent: "space-between",
      gap: "6px 12px"
    },
    benchTitle: {
      fontSize: "13px",
      fontWeight: 600,
      color: isDark ? "#e5e7eb" : "inherit"
    },
    benchVersion: {
      fontSize: "11px",
      color: isDark ? "#9ca3af" : "#6b7280"
    },
    benchHeaderRight: {
      display: "flex",
      flexWrap: "wrap",
      alignItems: "center",
      gap: "6px 10px",
      flexShrink: 0
    },
    benchChipRow: {
      display: "flex",
      alignItems: "center",
      gap: "6px",
      flexWrap: "wrap",
      margin: "2px 0 8px"
    },
    benchChip: {
      padding: "2px 10px",
      fontSize: "12px",
      cursor: "pointer",
      border: `1px solid ${isDark ? "#4b5563" : "#d1d5db"}`,
      borderRadius: "4px",
      background: isDark ? "#1f2937" : "#fff",
      color: isDark ? "#e5e7eb" : "#374151",
      fontFamily: "'Menlo', 'Monaco', 'Courier New', monospace"
    },
    benchChipActive: {
      background: "#D45D44",
      color: "white",
      borderColor: "#D45D44"
    },
    benchBlock: {
      border: `1px solid ${isDark ? "#374151" : "#e5e7eb"}`,
      borderRadius: "4px",
      padding: "8px 10px",
      background: isDark ? "#111827" : "#fafafa"
    },
    benchBlockTitle: {
      fontSize: "11px",
      fontWeight: 600,
      textTransform: "uppercase",
      letterSpacing: "0.04em",
      color: isDark ? "#9ca3af" : "#6b7280",
      marginBottom: "4px"
    },
    benchWorkload: {
      fontSize: "11px",
      fontStyle: "italic",
      color: isDark ? "#9ca3af" : "#6b7280",
      marginBottom: "6px",
      lineHeight: "1.3"
    },
    benchRow: {
      display: "flex",
      justifyContent: "space-between",
      fontSize: "12px",
      padding: "2px 0"
    },
    benchKey: {
      color: isDark ? "#9ca3af" : "#6b7280"
    },
    benchVal: {
      color: isDark ? "#e5e7eb" : "#111827",
      fontFamily: "'Menlo', 'Monaco', 'Courier New', monospace",
      fontWeight: 500
    },
    benchNotes: {
      fontSize: "11px",
      fontStyle: "italic",
      color: isDark ? "#9ca3af" : "#6b7280"
    },
    benchLegend: {
      fontSize: "10px",
      fontStyle: "italic",
      color: isDark ? "#6b7280" : "#9ca3af",
      marginTop: "6px",
      fontFamily: "'Menlo', 'Monaco', 'Courier New', monospace"
    },
    benchEmpty: {
      fontSize: "12px",
      fontStyle: "italic",
      color: isDark ? "#9ca3af" : "#6b7280"
    },
    benchTable: {
      display: "grid",
      columnGap: 0,
      rowGap: "3px",
      marginTop: "4px",
      alignItems: "baseline"
    },
    benchTableHead: {
      textAlign: "right",
      fontWeight: 500,
      fontSize: "11px",
      color: isDark ? "#9ca3af" : "#6b7280",
      paddingLeft: "16px",
      paddingBottom: "4px",
      whiteSpace: "nowrap"
    },
    benchTableCornerHead: {
      paddingBottom: "4px"
    },
    benchTableSeparator: {
      gridColumn: "1 / -1",
      height: "1px",
      background: isDark ? "#374151" : "#e5e7eb",
      marginTop: "-3px"
    },
    benchTableLabel: {
      textAlign: "left",
      fontSize: "12px",
      color: isDark ? "#9ca3af" : "#6b7280",
      whiteSpace: "nowrap"
    },
    benchTableValue: {
      textAlign: "right",
      fontSize: "12px",
      color: isDark ? "#e5e7eb" : "#111827",
      fontFamily: "'Menlo', 'Monaco', 'Courier New', monospace",
      fontWeight: 500,
      paddingLeft: "16px",
      whiteSpace: "nowrap"
    },
    benchTableValueMissing: {
      color: isDark ? "#6b7280" : "#9ca3af"
    }
  });
  const DIMENSIONS = ["hw", "variant", "quant", "strategy", "nodes"];
  const findCell = (cells, sel) => cells.find(c => DIMENSIONS.every(d => c.match[d] === sel[d]));
  const findBenchmark = (list, sel) => (list || []).find(b => DIMENSIONS.every(d => b.match[d] === sel[d])) || null;
  const normalizeSpeed = speed => {
    if (!speed) return [];
    return Array.isArray(speed) ? speed : [speed];
  };
  const effectiveAccuracy = (entry, sel) => entry ? {
    ...config.defaultAccuracy && config.defaultAccuracy[sel.variant] || ({}),
    ...entry.accuracy || ({})
  } : {};
  const benchmarkIsEmpty = (entry, accuracy) => {
    for (const m of normalizeSpeed(entry && entry.speed)) {
      if (m && typeof m === "object") {
        for (const [key, v] of Object.entries(m)) {
          if (key === "workload") continue;
          if (v !== null && v !== undefined) return false;
        }
      }
    }
    if (accuracy && typeof accuracy === "object") {
      for (const v of Object.values(accuracy)) {
        if (v !== null && v !== undefined) return false;
      }
    }
    return true;
  };
  const isOptionAvailable = (cells, sel, dim, value) => {
    const idx = DIMENSIONS.indexOf(dim);
    const higher = DIMENSIONS.slice(0, idx);
    return cells.some(c => c.match[dim] === value && higher.every(d => c.match[d] === sel[d]));
  };
  const snapToValidCell = (cells, sel, dim, value) => {
    const idx = DIMENSIONS.indexOf(dim);
    const higher = DIMENSIONS.slice(0, idx);
    const lower = DIMENSIONS.slice(idx + 1);
    let best = null, bestLowerMatches = -1;
    for (const c of cells) {
      if (c.match[dim] !== value) continue;
      if (!higher.every(d => c.match[d] === sel[d])) continue;
      let s = 0;
      for (const d of lower) if (c.match[d] === sel[d]) s++;
      if (s > bestLowerMatches) {
        bestLowerMatches = s;
        best = c;
      }
    }
    if (!best) return sel;
    const next = {
      ...sel,
      [dim]: value
    };
    for (const d of lower) next[d] = best.match[d];
    return next;
  };
  const validateSelection = (cells, parsed) => {
    const valid = {};
    for (const dim of DIMENSIONS) {
      const want = parsed[dim];
      const works = cells.some(c => c.match[dim] === want && DIMENSIONS.slice(0, DIMENSIONS.indexOf(dim)).every(d => c.match[d] === valid[d]));
      if (works) {
        valid[dim] = want;
      } else {
        const fallback = cells.find(c => DIMENSIONS.slice(0, DIMENSIONS.indexOf(dim)).every(d => c.match[d] === valid[d]));
        valid[dim] = fallback ? fallback.match[dim] : want;
      }
    }
    return valid;
  };
  const resolveModelName = sel => {
    const triple = `${sel.hw}|${sel.variant}|${sel.quant}`;
    const pair = `${sel.variant}|${sel.quant}`;
    return (config.modelNames[triple] ?? config.modelNames[pair]) ?? "";
  };
  const interpolate = (text, env, modelName) => text.replace(/{{(\w+)}}/g, (_, key) => key === "MODEL_NAME" ? modelName : env[key] ?? `{{${key}}}`);
  const parseNnodes = id => {
    if (id === "single") return 1;
    const m = (/^multi-(\d+)$/).exec(id);
    return m ? parseInt(m[1], 10) : 1;
  };
  const renderCommand = (cell, sel, envValues, mode = "python") => {
    if (!cell) return "# No command available for the current selection.";
    const modelName = resolveModelName(sel);
    const nnodes = parseNnodes(sel.nodes);
    const multinode = nnodes > 1;
    const cellEnv = cell.env || [];
    const flags = [...cell.flags || []];
    if (multinode) {
      const PARALLELISM_ANCHORS = ["--enable-dp-attention", "--dp", "--tp"];
      let i = -1;
      for (const anchor of PARALLELISM_ANCHORS) {
        i = flags.findIndex(f => f.split(/[\s=]/)[0] === anchor);
        if (i !== -1) break;
      }
      if (i === -1) i = flags.findIndex(f => f.startsWith("--model-path"));
      flags.splice(i + 1, 0, `--nnodes ${nnodes}`, `--node-rank {{NODE_RANK}}`, `--dist-init-addr {{NODE0_IP}}:20000`);
    }
    let cmd;
    if (mode === "docker") {
      const image = config.dockerImages && config.dockerImages[sel.hw] || "lmsysorg/sglang:dev";
      const portFlag = flags.find(x => x.split(/[\s=]/)[0] === "--port");
      const servePort = portFlag ? portFlag.slice(("--port").length).trim() : "{{PORT}}";
      const dockerLines = ["docker run --gpus all", "  --shm-size 32g", multinode ? "  --network host" : `  -p ${servePort}:${servePort}`, "  -v ~/.cache/huggingface:/root/.cache/huggingface", `  --env "HF_TOKEN={{HF_TOKEN}}"`, ...cellEnv.map(e => `  --env ${e}`), "  --ipc=host", `  ${image}`, "  sglang serve", ...flags.map(f => "    " + f)];
      cmd = dockerLines.join(" \\\n");
    } else {
      const flagBlock = flags.map(f => "  " + f).join(" \\\n");
      const envBlock = cellEnv.length ? cellEnv.join(" \\\n") + " \\\n" : "";
      cmd = `${envBlock}sglang serve \\\n${flagBlock}`;
    }
    if (multinode && config.multiNodeHints && config.multiNodeHints[sel.hw]) {
      const hint = config.multiNodeHints[sel.hw].map(line => line.length ? "# " + line : "#").join("\n");
      cmd = `${hint}\n${cmd}`;
    }
    cmd = interpolate(cmd, envValues, modelName);
    if (multinode) {
      const header = `# Multi-node (${nnodes} nodes). Run the same command on every node with:\n` + `#   <node-rank> = 0 on the head node, 1..${nnodes - 1} on the others\n` + `#   <node0-ip>  = IP of the head node (reachable from all others)`;
      cmd = `${header}\n${cmd}`;
    }
    return cmd;
  };
  const ACCURACY_LABELS = config.accuracyLabels || [];
  const renderBenchmarkCard = entry => {
    const SPEED_LABELS = [["ttft_ms", "TTFT", "ms"], ["tpot_ms", "TPOT", "ms"], ["tokens_per_sec_per_gpu", "tokens/sec/GPU", ""], ["interactivity", "interactivity", "tok/s", m => m.tpot_ms != null && m.tpot_ms !== 0 ? Math.round(1000 / m.tpot_ms * 10) / 10 : null]];
    const WORKLOAD_KEYS = ["dataset", "isl", "osl", "max_concurrency"];
    const fmt = (val, unit) => {
      if (val === null || val === undefined) return null;
      return `${val}${unit ? " " + unit : ""}`;
    };
    const formatWorkloadParts = (workload, keys) => {
      if (!workload) return "";
      const parts = [];
      if (keys.has("dataset") && workload.dataset) parts.push(workload.dataset);
      if (keys.has("isl") || keys.has("osl")) {
        if (workload.isl != null || workload.osl != null) {
          parts.push(`in/out=${workload.isl != null ? workload.isl : "?"}/${workload.osl != null ? workload.osl : "?"}`);
        }
      }
      if (keys.has("max_concurrency") && workload.max_concurrency != null) {
        parts.push(`max-concurrency=${workload.max_concurrency}`);
      }
      return parts.join(", ");
    };
    const ALWAYS_PER_COLUMN = new Set(["max_concurrency"]);
    const partitionWorkload = measurements => {
      const shared = new Set();
      const differing = new Set();
      for (const k of WORKLOAD_KEYS) {
        const seen = new Set();
        let anyPresent = false;
        for (const m of measurements) {
          const v = m && m.workload ? m.workload[k] : undefined;
          if (v != null) anyPresent = true;
          seen.add(v);
        }
        if (!anyPresent) continue;
        if (ALWAYS_PER_COLUMN.has(k) || seen.size > 1) differing.add(k); else shared.add(k);
      }
      return {
        shared,
        differing
      };
    };
    const renderBenchTable = ({title, sharedText, colHeaders, rows, colCount, legend}) => {
      if (rows.length === 0) return null;
      const showColHeaders = colHeaders.length > 0 && colHeaders.some(h => h !== "");
      return <div style={s.benchBlock}>
          <div style={s.benchBlockTitle}>{title}</div>
          {sharedText && <div style={s.benchWorkload}>{sharedText}</div>}
          <div style={{
        ...s.benchTable,
        gridTemplateColumns: `max-content repeat(${colCount}, minmax(0, 1fr))`
      }}>
            {showColHeaders && <div key="corner" style={s.benchTableCornerHead}></div>}
            {showColHeaders && colHeaders.map((h, i) => <div key={`hdr-${i}`} style={s.benchTableHead}>{h}</div>)}
            {showColHeaders && <div key="sep" style={s.benchTableSeparator}></div>}
            {rows.map(r => [<div key={`lbl-${r.label}`} style={s.benchTableLabel}>{r.label}</div>, ...r.values.map((v, i) => <div key={`val-${r.label}-${i}`} style={v === null ? {
        ...s.benchTableValue,
        ...s.benchTableValueMissing
      } : s.benchTableValue}>
                  {v !== null ? v : "—"}
                </div>)])}
          </div>
          {legend && <div style={s.benchLegend}>{legend}</div>}
        </div>;
    };
    const buildSpeedTable = measurements => {
      if (measurements.length === 0) return null;
      const {shared, differing} = partitionWorkload(measurements);
      const sharedText = formatWorkloadParts(measurements[0] && measurements[0].workload, shared);
      const colHeaders = measurements.map(m => formatWorkloadParts(m && m.workload, differing));
      const rows = SPEED_LABELS.map(tup => {
        const [key, label, unit, compute] = tup;
        const values = measurements.map(m => {
          const raw = compute ? compute(m) : m[key];
          return fmt(raw, unit);
        });
        return {
          label,
          values
        };
      });
      return {
        title: "Speed",
        sharedText,
        colHeaders,
        rows,
        colCount: measurements.length,
        legend: "interactivity = 1000 / TPOT(ms)"
      };
    };
    const buildAccuracyTable = accuracy => {
      if (!accuracy) return null;
      const rows = ACCURACY_LABELS.map(([key, label, unit]) => {
        const v = fmt(accuracy[key], unit);
        if (v === null) return null;
        return {
          label,
          values: [v]
        };
      }).filter(r => r !== null);
      if (rows.length === 0) return null;
      return {
        title: "Accuracy",
        sharedText: null,
        colHeaders: [],
        rows,
        colCount: 1
      };
    };
    const accuracy = effectiveAccuracy(entry, sel);
    const isEmpty = benchmarkIsEmpty(entry, accuracy);
    const measurements = !isEmpty ? normalizeSpeed(entry && entry.speed) : [];
    const accuracyTable = !isEmpty ? buildAccuracyTable(accuracy) : null;
    const speedTable = !isEmpty ? buildSpeedTable(measurements) : null;
    const hasBenchCmds = !isEmpty && buildBenchCommands(entry, sel) !== null;
    return <div style={s.benchCard}>
        <div style={s.benchHeader}>
          <div style={s.benchTitle}>Benchmark</div>
          <div style={s.benchHeaderRight}>
            {!isEmpty && entry && entry.sglang_version && <div style={s.benchVersion}>measured on sglang <code>{entry.sglang_version}</code></div>}
            {hasBenchCmds && <button style={s.iconButton} onClick={() => setModal("bench")}>⚡ Reproduce</button>}
          </div>
        </div>
        {isEmpty ? <div style={s.benchEmpty}>
            Benchmark data pending for this combination — submit yours via the Playground's Submit ↗ button.
          </div> : <>
            {accuracyTable && renderBenchTable(accuracyTable)}
            {speedTable && renderBenchTable(speedTable)}
            {entry && entry.notes && <div style={s.benchNotes}>{entry.notes}</div>}
          </>}
      </div>;
  };
  const buildBenchCommands = (entry, sel) => {
    const bc = config.benchmarkCommands;
    if (!bc) return null;
    const acc = effectiveAccuracy(entry, sel);
    const accuracy = [];
    if (bc.accuracy) {
      for (const [key, label] of ACCURACY_LABELS) {
        if (acc[key] == null) continue;
        const tmpl = bc.accuracy[key];
        const resolved = typeof tmpl === "string" ? tmpl : tmpl && tmpl[sel.variant] || null;
        if (resolved) accuracy.push({
          key,
          label,
          template: resolved
        });
      }
    }
    let speed = null;
    if (bc.speed && entry) {
      const ms = normalizeSpeed(entry.speed).filter(m => m && m.workload && m.workload.max_concurrency != null);
      const concurrencies = [...new Set(ms.map(m => m.workload.max_concurrency))].sort((a, b) => a - b);
      if (concurrencies.length) {
        speed = {
          template: bc.speed,
          concurrencies,
          workload: ms[0].workload,
          numPromptsOf: c => {
            const m = ms.find(x => x.workload.max_concurrency === c);
            if (m && m.workload.num_prompts != null) return m.workload.num_prompts;
            const tbl = bc.numPromptsByConc;
            if (tbl && tbl[c] != null) return tbl[c];
            return Math.max(c * 2, 200);
          }
        };
      }
    }
    if (accuracy.length === 0 && !speed) return null;
    return {
      accuracy,
      speed
    };
  };
  const buildHardwareGroups = () => {
    const supported = new Set(config.supportedHardware);
    const catalog = {};
    for (const [vendor, list] of Object.entries(HARDWARE_CATALOG)) catalog[vendor] = [...list];
    for (const hw of config.hardware || []) {
      const vendor = hw.vendor || "nvidia";
      const list = catalog[vendor] || (catalog[vendor] = []);
      const entry = {
        id: hw.id,
        label: hw.label,
        vram: hw.vram
      };
      const i = list.findIndex(x => x.id === hw.id);
      if (i >= 0) list[i] = entry; else list.push(entry);
    }
    const groups = [];
    for (const [vendor, list] of Object.entries(catalog)) {
      const items = list.filter(hw => supported.has(hw.id)).map(hw => ({
        id: hw.id,
        label: hw.label,
        subtitle: hw.vram
      }));
      if (items.length) groups.push({
        label: vendor.toUpperCase(),
        items
      });
    }
    return groups;
  };
  const initialSelectionFromCells = () => {
    const first = config.cells[0];
    if (!first) return Object.fromEntries(DIMENSIONS.map(d => [d, ""]));
    return {
      hw: first.match.hw,
      variant: first.match.variant,
      quant: first.match.quant,
      strategy: first.match.strategy,
      nodes: first.match.nodes
    };
  };
  const placeholderDefaults = schema => {
    const out = {};
    for (const [k, v] of Object.entries(schema || ({}))) out[k] = v.default ?? "";
    return out;
  };
  const [isDark, setIsDark] = useState(false);
  useEffect(() => {
    const check = () => {
      const html = document.documentElement;
      setIsDark(html.classList.contains("dark") || html.getAttribute("data-theme") === "dark" || html.style.colorScheme === "dark");
    };
    check();
    const observer = new MutationObserver(check);
    observer.observe(document.documentElement, {
      attributes: true,
      attributeFilter: ["class", "data-theme", "style"]
    });
    return () => observer.disconnect();
  }, []);
  const STORAGE_KEY = "sglang-deploy-env";
  const [env, setEnv] = useState(() => placeholderDefaults(config.placeholders));
  useEffect(() => {
    try {
      const raw = window.localStorage.getItem(STORAGE_KEY);
      if (raw) {
        const parsed = JSON.parse(raw);
        setEnv({
          ...placeholderDefaults(config.placeholders),
          ...parsed
        });
      }
    } catch {}
  }, []);
  const saveEnv = next => {
    setEnv(next);
    try {
      window.localStorage.setItem(STORAGE_KEY, JSON.stringify(next));
    } catch {}
  };
  const [sel, setSel] = useState(() => initialSelectionFromCells());
  useEffect(() => {
    const hydrate = () => {
      const raw = window.location.hash.replace(/^#/, "");
      if (!raw) return;
      const params = new URLSearchParams(raw);
      const initial = initialSelectionFromCells();
      const parsed = {
        ...initial
      };
      let touched = false;
      params.forEach((value, key) => {
        if ((key in parsed)) {
          parsed[key] = value;
          touched = true;
        }
      });
      if (!touched) return;
      setSel(validateSelection(config.cells, parsed));
      const el = document.getElementById("deployment") || document.getElementById("deploy");
      if (el) el.scrollIntoView({
        behavior: "smooth",
        block: "start"
      });
    };
    hydrate();
    window.addEventListener("hashchange", hydrate);
    return () => window.removeEventListener("hashchange", hydrate);
  }, []);
  useEffect(() => {
    const target = "#" + new URLSearchParams(sel).toString();
    if (window.location.hash !== target) {
      window.history.replaceState(null, "", target);
    }
    window.dispatchEvent(new CustomEvent("sglang-deploy-sel", {
      detail: sel
    }));
  }, [sel]);
  const [modal, setModal] = useState(null);
  useEffect(() => {
    if (modal === null) return;
    const onKey = e => {
      if (e.key === "Escape") setModal(null);
    };
    const prev = document.body.style.overflow;
    document.body.style.overflow = "hidden";
    window.addEventListener("keydown", onKey);
    return () => {
      window.removeEventListener("keydown", onKey);
      document.body.style.overflow = prev;
    };
  }, [modal]);
  const [copied, setCopied] = useState(false);
  const [curlCopied, setCurlCopied] = useState(false);
  const [envDraft, setEnvDraft] = useState(env);
  const [benchConc, setBenchConc] = useState(null);
  const [benchAcc, setBenchAcc] = useState(null);
  const [benchCopied, setBenchCopied] = useState(null);
  const [runMode, setRunMode] = useState("python");
  useEffect(() => {
    if (modal === "env") setEnvDraft(env);
  }, [modal, env]);
  const s = makeStyles(isDark);
  const cell = findCell(config.cells, sel);
  const command = renderCommand(cell, sel, env, runMode);
  const mtpHint = !!cell && (cell.flags || []).some(f => f.split(/[\s=]/)[0] === "--speculative-algorithm") && !(cell.flags || []).some(f => f.split(/[\s=]/)[0] === "--max-running-requests");
  const modelName = resolveModelName(sel);
  const curlText = interpolate(config.curl || "", env, modelName);
  const hwGroups = buildHardwareGroups();
  const benchEntry = benchmarks ? findBenchmark(benchmarks, sel) : null;
  const isEnabled = (dim, value) => isOptionAvailable(config.cells, sel, dim, value);
  const handleSelect = (dim, value) => {
    setSel(prev => snapToValidCell(config.cells, prev, dim, value));
  };
  const handleCopy = () => {
    navigator.clipboard.writeText(command);
    setCopied(true);
    setTimeout(() => setCopied(false), 1200);
  };
  const copyCurl = () => {
    navigator.clipboard.writeText(curlText);
    setCurlCopied(true);
    setTimeout(() => setCurlCopied(false), 1200);
  };
  const copyBench = (key, text) => {
    navigator.clipboard.writeText(text);
    setBenchCopied(key);
    setTimeout(() => setBenchCopied(null), 1200);
  };
  const placeholderGroups = (() => {
    const out = {
      command: [],
      curl: []
    };
    for (const [key, meta] of Object.entries(config.placeholders || ({}))) {
      (out[meta.target] || (out[meta.target] = [])).push({
        key,
        ...meta
      });
    }
    return out;
  })();
  const renderButton = (item, dim, selectedId) => {
    const checked = selectedId === item.id;
    const disabled = !isEnabled(dim, item.id);
    return <label key={item.id} style={{
      ...s.labelBase,
      ...checked ? s.checked : {},
      ...disabled ? s.disabled : {}
    }} title={disabled ? "Not supported for current selection" : ""} onClick={e => {
      if (disabled) {
        e.preventDefault();
        return;
      }
      handleSelect(dim, item.id);
    }}>
        <input type="radio" checked={checked} disabled={disabled} readOnly style={{
      display: "none"
    }} />
        <span>{item.label}</span>
        {item.subtitle && <small style={{
      ...s.subtitle,
      color: checked ? "rgba(255,255,255,0.85)" : "inherit"
    }}>
            {item.subtitle}
          </small>}
      </label>;
  };
  const renderFlatSection = (title, options, dim, selectedId) => <div style={s.card}>
      <div style={s.title}>{title}</div>
      <div style={s.itemsGrid(options.length)}>
        {options.map(item => renderButton(item, dim, selectedId))}
      </div>
    </div>;
  const maxHwCols = Math.max(...hwGroups.map(x => x.items.length));
  return <div style={s.container} className="not-prose">
      {}
      <div style={s.cardColumn}>
        <div style={{
    ...s.title,
    marginBottom: "2px"
  }}>Hardware Platform</div>
        {hwGroups.map(g => <div key={g.label} style={s.vendorRow}>
            <div style={s.vendorLabel}>{g.label}</div>
            <div style={s.itemsGrid(maxHwCols)}>
              {g.items.map(item => renderButton(item, "hw", sel.hw))}
              {Array.from({
    length: maxHwCols - g.items.length
  }).map((_, i) => <div key={`pad-${i}`} />)}
            </div>
          </div>)}
      </div>

      {renderFlatSection("Model Variant", config.variants, "variant", sel.variant)}
      {renderFlatSection("Quantization", config.quantizations, "quant", sel.quant)}
      {renderFlatSection("Strategy", config.strategies, "strategy", sel.strategy)}
      {renderFlatSection("Nodes", config.nodesOptions, "nodes", sel.nodes)}

      {}
      <div style={s.card}>
        <div style={s.title}>Run this Command:</div>
        <div style={s.commandWrap}>
          <div style={s.commandHeader}>
            <div style={s.headerLeft}>
              <div style={s.badge(Boolean(cell && cell.verified))}>
                <span style={s.badgeDot(Boolean(cell && cell.verified))} />
                {cell && cell.verified ? "Verified" : "Not Verified"}
              </div>
              <div style={s.runModeWrap} role="tablist" aria-label="Output format">
                <span style={s.runModeChip(runMode === "python")} onClick={() => setRunMode("python")} role="tab" aria-selected={runMode === "python"}>
                  Python
                </span>
                <span style={s.runModeChipLast(runMode === "docker")} onClick={() => setRunMode("docker")} role="tab" aria-selected={runMode === "docker"}>
                  Docker
                </span>
              </div>
            </div>
            <div style={s.iconRow}>
              <button style={s.iconButton} onClick={handleCopy}>
                {copied ? "✓ Copied" : "⧉ Copy"}
              </button>
              <button style={s.iconButton} onClick={() => setModal("curl")}>$ cURL</button>
              <button style={s.iconButton} onClick={() => setModal("env")}>⚙ Env</button>
            </div>
          </div>
          <pre style={s.commandPre}>{command}</pre>
          {mtpHint && <div style={s.mtpWarn}>
              ⚠️ Speculative decoding (MTP) is on — SGLang resets <code>--max-running-requests</code> to <strong>48</strong> when it isn't set. Add <code>--max-running-requests &lt;N&gt;</code> sized for your target concurrency.
            </div>}
        </div>
      </div>

      {}
      {benchmarks && cell && renderBenchmarkCard(benchEntry)}

      {}
      <div style={{
    padding: "6px 12px",
    fontSize: "12px",
    color: isDark ? "#9ca3af" : "#6b7280",
    display: "flex",
    alignItems: "center",
    gap: "6px"
  }}>
        <span>Need to go beyond the verified matrix?</span>
        <button type="button" onClick={() => {
    const el = document.getElementById("playground");
    if (el) el.scrollIntoView({
      behavior: "smooth",
      block: "start"
    });
  }} style={{
    background: "transparent",
    border: "none",
    padding: 0,
    color: isDark ? "#FDBA74" : "#C2410C",
    cursor: "pointer",
    fontSize: "12px",
    fontWeight: 600,
    textDecoration: "underline",
    textUnderlineOffset: "2px"
  }}>
          Open the Playground →
        </button>
      </div>

      {}
      {modal === "curl" && <div style={s.modalBackdrop} onClick={() => setModal(null)}>
          <div style={s.modalBox} onClick={e => e.stopPropagation()}>
            <div style={s.modalHeader}>
              <div style={s.modalTitle}>cURL example</div>
              <button style={s.modalCloseBtn} onClick={() => setModal(null)} aria-label="Close">×</button>
            </div>
            <div style={s.commandWrap}>
              <div style={s.commandHeader}>
                <div style={{
    fontSize: 11,
    opacity: 0.7
  }}>
                  Model: <code>{modelName || "(unresolved)"}</code>
                </div>
                <button style={s.iconButton} onClick={copyCurl}>
                  {curlCopied ? "✓ Copied" : "⧉ Copy"}
                </button>
              </div>
              <pre style={s.commandPre}>{curlText}</pre>
            </div>
            <p style={{
    fontSize: 11,
    opacity: 0.7,
    marginTop: 8
  }}>
              Edit <code>CURL_HOST</code> / <code>CURL_PORT</code> in the Env panel.
            </p>
          </div>
        </div>}

      {}
      {modal === "env" && <div style={s.modalBackdrop} onClick={() => setModal(null)}>
          <div style={s.modalBox} onClick={e => e.stopPropagation()}>
            <div style={s.modalHeader}>
              <div style={s.modalTitle}>Env / placeholder values</div>
              <button style={s.modalCloseBtn} onClick={() => setModal(null)} aria-label="Close">×</button>
            </div>
            {placeholderGroups.curl.length > 0 && <div>
                <div style={s.sectionHeading}>cURL placeholders</div>
                {placeholderGroups.curl.map(({key, label}) => <div key={key} style={s.formField}>
                    <label style={s.formLabel}>
                      {label} <code style={{
    opacity: 0.6
  }}>{`{{${key}}}`}</code>
                    </label>
                    <input style={s.formInput} value={envDraft[key] ?? ""} onChange={e => setEnvDraft({
    ...envDraft,
    [key]: e.target.value
  })} />
                  </div>)}
              </div>}
            {placeholderGroups.command.length > 0 && <div>
                <div style={s.sectionHeading}>Command placeholders</div>
                {placeholderGroups.command.map(({key, label}) => <div key={key} style={s.formField}>
                    <label style={s.formLabel}>
                      {label} <code style={{
    opacity: 0.6
  }}>{`{{${key}}}`}</code>
                    </label>
                    <input style={s.formInput} value={envDraft[key] ?? ""} onChange={e => setEnvDraft({
    ...envDraft,
    [key]: e.target.value
  })} />
                  </div>)}
              </div>}
            <div style={{
    display: "flex",
    justifyContent: "flex-end",
    gap: 8,
    marginTop: 16
  }}>
              <button style={{
    ...s.iconButton,
    padding: "6px 14px"
  }} onClick={() => setModal(null)}>Cancel</button>
              <button style={s.primaryBtn} onClick={() => {
    saveEnv(envDraft);
    setModal(null);
  }}>Save</button>
            </div>
            <p style={{
    fontSize: 11,
    opacity: 0.7,
    marginTop: 10
  }}>
              Values persist in localStorage and are reused the next time you visit any cookbook.
            </p>
          </div>
        </div>}

      {}
      {modal === "bench" && benchEntry && (() => {
    const bc = buildBenchCommands(benchEntry, sel);
    if (!bc) return null;
    const selSummary = `${sel.hw.toUpperCase()} · ${sel.variant} · ${sel.quant.toUpperCase()} · ${sel.strategy} · ${sel.nodes}`;
    let selConc = null;
    let speedCmd = null;
    if (bc.speed) {
      selConc = bc.speed.concurrencies.includes(benchConc) ? benchConc : bc.speed.concurrencies[0];
      const w = bc.speed.workload;
      speedCmd = interpolate(bc.speed.template, {
        ...env,
        DATASET: w.dataset,
        ISL: w.isl,
        OSL: w.osl,
        MAX_CONCURRENCY: selConc,
        NUM_PROMPTS: bc.speed.numPromptsOf(selConc)
      }, modelName);
    }
    let selAcc = null;
    let accCmd = null;
    if (bc.accuracy.length > 0) {
      selAcc = bc.accuracy.find(a => a.key === benchAcc) || bc.accuracy[0];
      accCmd = interpolate(selAcc.template, env, modelName);
    }
    return <div style={s.modalBackdrop} onClick={() => setModal(null)}>
            <div style={s.modalBox} onClick={e => e.stopPropagation()}>
              <div style={s.modalHeader}>
                <div style={s.modalTitle}>Benchmark commands</div>
                <button style={s.modalCloseBtn} onClick={() => setModal(null)} aria-label="Close">×</button>
              </div>
              <p style={{
      fontSize: 11,
      opacity: 0.7,
      margin: "0 0 12px"
    }}>
                For <code>{selSummary}</code>. Start the server with the Deploy command above, then run these against it.
              </p>

              {selAcc && <div>
                  <div style={s.sectionHeading}>Accuracy</div>
                  {bc.accuracy.length > 1 && <div style={s.benchChipRow}>
                      <span style={{
      fontSize: 11,
      opacity: 0.7
    }}>benchmark:</span>
                      {bc.accuracy.map(a => <button key={a.key} style={{
      ...s.benchChip,
      ...a.key === selAcc.key ? s.benchChipActive : {}
    }} onClick={() => setBenchAcc(a.key)}>
                          {a.label}
                        </button>)}
                    </div>}
                  <div style={{
      ...s.commandWrap,
      marginBottom: 6
    }}>
                    <div style={s.commandHeader}>
                      <div style={{
      fontSize: 11,
      opacity: 0.7
    }}>{selAcc.label}</div>
                      <button style={s.iconButton} onClick={() => copyBench("acc", accCmd)}>
                        {benchCopied === "acc" ? "✓ Copied" : "⧉ Copy"}
                      </button>
                    </div>
                    <pre style={s.commandPre}>{accCmd}</pre>
                  </div>
                  {bc.accuracy.length > 1 && <p style={{
      fontSize: 11,
      opacity: 0.7,
      margin: "0 0 4px"
    }}>
                      Switch the benchmark chip to see each eval's command.
                    </p>}
                </div>}

              {bc.speed && <div>
                  <div style={s.sectionHeading}>Speed</div>
                  {bc.speed.concurrencies.length > 1 && <div style={s.benchChipRow}>
                      <span style={{
      fontSize: 11,
      opacity: 0.7
    }}>max-concurrency:</span>
                      {bc.speed.concurrencies.map(c => <button key={c} style={{
      ...s.benchChip,
      ...c === selConc ? s.benchChipActive : {}
    }} onClick={() => setBenchConc(c)}>
                          {c}
                        </button>)}
                    </div>}
                  <div style={{
      ...s.commandWrap,
      marginBottom: 6
    }}>
                    <div style={s.commandHeader}>
                      <div style={{
      fontSize: 11,
      opacity: 0.7
    }}>max-concurrency = {selConc}</div>
                      <button style={s.iconButton} onClick={() => copyBench("speed", speedCmd)}>
                        {benchCopied === "speed" ? "✓ Copied" : "⧉ Copy"}
                      </button>
                    </div>
                    <pre style={s.commandPre}>{speedCmd}</pre>
                  </div>
                  <p style={{
      fontSize: 11,
      opacity: 0.7,
      margin: "0 0 4px"
    }}>
                    One command — switch the concurrency chip (or edit <code>--max-concurrency</code>) to reproduce each Speed column.
                  </p>
                </div>}

              <p style={{
      fontSize: 11,
      opacity: 0.7,
      marginTop: 12
    }}>
                Edit <code>CURL_HOST</code> / <code>CURL_PORT</code> in the Env panel.
              </p>
            </div>
          </div>;
  })()}
    </div>;
};

## Deployment

<a id="install" />

<Accordion title="Install SGLang">
  For all methods and hardware platforms, see the [official SGLang installation guide](../../../docs/get-started/install). The two paths below match the **Python / Docker** toggle in the command panel.

  <Tabs>
    <Tab title="Python (pip / uv)">
      ```bash Command theme={null}
      pip install --upgrade pip
      pip install uv
      uv pip install sglang
      ```

      Then run the **Python** output of the command panel below in that environment.
    </Tab>

    <Tab title="Docker">
      A single image — `lmsysorg/sglang:latest` — covers the **datacenter GPUs** in this cookbook (B200 / B300 / GB200 / GB300 / H100 / H200). For **RTX PRO 6000 (SM120)**, use the nightly `lmsysorg/sglang:dev` instead — SM120 support isn't in `:latest` yet (see the RTX PRO 6000 note below).

      ```bash Command theme={null}
      docker pull lmsysorg/sglang:latest
      ```

      For how to launch the image, see [Install → Method 3: Using Docker](../../../docs/get-started/install#method-3-using-docker). A minimal example (substitute the inner `sglang serve ...` with whatever the command generator below produces):

      ```bash Command theme={null}
      docker run --gpus all \
          --shm-size 32g \
          -p 30000:30000 \
          -v ~/.cache/huggingface:/root/.cache/huggingface \
          --env "HF_TOKEN=<your-hf-token>" \
          --ipc=host \
          lmsysorg/sglang:latest \
          sglang serve <use args below>
      ```
    </Tab>
  </Tabs>
</Accordion>

Pick your hardware + recipe to generate the launch command. The three serving strategies cover the common operating points:

* **Low-Latency** — fastest reply for a single user. Pick for chat.
* **Balanced** — good speed with several users at once. Use for typical multi-user serving.
* **High-Throughput** — most tokens per second across many users. Best for batch jobs.

<Deployment config={config} benchmarks={benchmarks} />

<div style={{fontSize: "0.85em", lineHeight: "1.55", color: "#6b7280", margin: "0.5rem 0 1rem 0"}}>
  <p style={{margin: "0 0 0.3rem 0"}}><strong>Panel controls</strong> (top of the command box):</p>

  <ul style={{margin: 0, paddingLeft: "1.25rem"}}>
    <li style={{marginBottom: "0.2rem"}}><strong>Python / Docker</strong> — bare <code>sglang serve …</code> for an existing SGLang env, or a <code>docker run … sglang serve …</code> wrap against the per-hardware image from the <a href="#install">Install SGLang</a> panel above.</li>
    <li style={{marginBottom: "0.2rem"}}><strong>⧉ Copy</strong> — copies the current command (with whichever framing is active) to your clipboard.</li>
    <li style={{marginBottom: "0.2rem"}}><strong>\$ cURL</strong> — a sample request against <code>localhost:30000</code> to confirm the server is up.</li>
    <li style={{marginBottom: "0.2rem"}}><strong>⚙ Env</strong> — edits the placeholders (<code>HOST\_IP</code>, <code>PORT</code>, <code>HF\_TOKEN</code>, <code>NODE\_RANK</code>, <code>NODE0\_IP</code>) the command and cURL share. Persists in localStorage across cookbooks.</li>
    <li><strong>Verified / Not Verified</strong> badge — green when the <code>(hw, variant, quant, strategy, nodes)</code> combo has been run end-to-end on real hardware; yellow when auto-derived from a neighbor and not yet re-checked.</li>
  </ul>
</div>

## Playground

The Playground is where you experiment with **SGLang features beyond the verified matrix**. The Deploy panel above only emits combinations the SGLang team has signed off on; the Playground lets you turn on additional knobs on top of whichever cell the Deploy panel is currently showing. The base is read live from your Deploy selection — only your overrides change.

The knobs come in two flavors:

* **Built-in SGLang features** — parallelism overrides (TP / CP / DP-Attention — DP-Attention's value is the DP degree, with `off` to disable), MoE backend + EP, reasoning / tool-call parsers, speculative-decoding presets, prefill/decode disaggregation, HiCache tiers, and HiSparse hierarchical sparse attention (decode-role only — the card appears once PD-Disagg mode is set to decode).
* **DeepSeek-V4 specific features** — MegaMoE W4A8 / W4A4 fused kernel (Blackwell only).

Lines highlighted **green** are added by your overrides; lines with **red strikethrough** were in the verified base but stripped by an override. When no override differs from the base cell, the playground inherits the base's **Verified** badge; any actual change flips it to **Not Verified** until the new configuration is run end-to-end and submitted back.

<Playground config={config} />

<div style={{fontSize: "0.85em", lineHeight: "1.55", color: "#6b7280", margin: "0.5rem 0 1rem 0"}}>
  <p style={{margin: "0 0 0.3rem 0"}}><strong>Panel controls</strong> reuse <strong>Python / Docker</strong> · <strong>⧉ Copy</strong> · <strong>\$ cURL</strong> · <strong>⚙ Env</strong> from the Deploy panel, plus one extra:</p>

  <ul style={{margin: 0, paddingLeft: "1.25rem"}}>
    <li><strong>Submit ↗</strong> — opens a pre-filled GitHub issue so you can land your override combo as a new verified cookbook cell. Shown only while the badge says <strong>Not Verified</strong>; click it once you've actually run the command on your hardware and confirmed it works.</li>
  </ul>
</div>

## 1. Model Introduction

**DeepSeek-V4** is the next-generation Mixture-of-Experts model from DeepSeek, released 2026-04-24 under an **MIT License**. It ships as two Instruct repos (one per variant) plus matching Base repos:

<table style={{width: "100%", borderCollapse: "collapse", tableLayout: "fixed"}}>
  <colgroup>
    <col style={{width: "30%"}} />

    <col style={{width: "15%"}} />

    <col style={{width: "15%"}} />

    <col style={{width: "40%"}} />
  </colgroup>

  <thead>
    <tr style={{borderBottom: "2px solid #d55816"}}>
      <th style={{textAlign: "left", padding: "10px 12px", fontWeight: 700, whiteSpace: "nowrap", backgroundColor: "rgba(255,255,255,0.02)"}}>Variant</th>
      <th style={{textAlign: "right", padding: "10px 12px", fontWeight: 700, whiteSpace: "nowrap", backgroundColor: "rgba(255,255,255,0.05)"}}>Total params</th>
      <th style={{textAlign: "right", padding: "10px 12px", fontWeight: 700, whiteSpace: "nowrap", backgroundColor: "rgba(255,255,255,0.02)"}}>Active (MoE)</th>
      <th style={{textAlign: "left", padding: "10px 12px", fontWeight: 700, whiteSpace: "nowrap", backgroundColor: "rgba(255,255,255,0.05)"}}>Use</th>
    </tr>
  </thead>

  <tbody>
    <tr>
      <td style={{padding: "9px 12px", fontWeight: 500, backgroundColor: "rgba(255,255,255,0.02)"}}><strong><a href="https://huggingface.co/deepseek-ai/DeepSeek-V4-Flash">DeepSeek-V4-Flash</a></strong></td>
      <td style={{padding: "9px 12px", textAlign: "right", backgroundColor: "rgba(255,255,255,0.05)"}}><strong>284B</strong></td>
      <td style={{padding: "9px 12px", textAlign: "right", backgroundColor: "rgba(255,255,255,0.02)"}}>13B</td>
      <td style={{padding: "9px 12px", backgroundColor: "rgba(255,255,255,0.05)"}}>single-node serving on B200 / B300 / GB200 / GB300 / H200 (TP=4); H100 (TP=8)</td>
    </tr>

    <tr>
      <td style={{padding: "9px 12px", fontWeight: 500, backgroundColor: "rgba(255,255,255,0.02)"}}><strong><a href="https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro">DeepSeek-V4-Pro</a></strong></td>
      <td style={{padding: "9px 12px", textAlign: "right", backgroundColor: "rgba(255,255,255,0.05)"}}><strong>1.6T</strong></td>
      <td style={{padding: "9px 12px", textAlign: "right", backgroundColor: "rgba(255,255,255,0.02)"}}>49B</td>
      <td style={{padding: "9px 12px", backgroundColor: "rgba(255,255,255,0.05)"}}>high-capacity: B200 / B300 (TP=8) · GB300 (TP=4) · H200 FP4 (TP=8) · GB200 (2-node, TP=8) · H200 FP8 (2-node, TP=16) · H100 (2-node, TP=16)</td>
    </tr>
  </tbody>
</table>

Both Instruct repos ship as **FP4 MoE experts + FP8 attention / dense** (one mixed-precision checkpoint covers every FP4-capable GPU). Matching `*-Base` repos ship pure FP8 mixed and are for further pre-training only — not for chat or tool calling.

**Highlights:** hybrid CSA + HCA attention (\~27% inference FLOPs / \~10% KV cache vs DSv3.2 at 1M context), manifold-constrained hyper-connections (mHC), Muon optimizer, **1M-token context** (32T+ pre-training tokens), three reasoning modes (*Non-think* / *Think High* / *Think Max* — use ≥ 384K context for Think Max), and a dedicated `encoding_dsv4.encode_messages` Python encoder + DSML tool-call grammar.

**Recommended generation:** `temperature=1.0`, `top_p=1.0`.

**Resources:** HuggingFace · [Flash](https://huggingface.co/deepseek-ai/DeepSeek-V4-Flash) · [Pro](https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro)  ·  ModelScope · [Flash](https://modelscope.cn/models/deepseek-ai/DeepSeek-V4-Flash) · [Pro](https://modelscope.cn/models/deepseek-ai/DeepSeek-V4-Pro).

## 2. Configuration Tips

**Concurrency & DeepEP dispatch buffer**

Must hold: `max-running-requests × MTP_draft_tokens ≤ SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK`. Violating it blows DeepEP's dispatch buffer at steady-state load (`deep_ep.cpp:1105`). When tuning, move `--cuda-graph-max-bs`, `--max-running-requests`, and the env together.

The generator currently picks values on the **conservative** side (mirroring an internal stress-test matrix). They run safely out of the box but likely leave throughput on the table — please tune them up toward your actual workload's peak concurrency and report findings back so the defaults can be revised.

**MTP (Multi-Token Prediction, EAGLE)**

* `low-latency`: steps=3, draft-tokens=4 → largest win at bs=1.
* `balanced`: steps=1, draft-tokens=2 → gentler MTP, reduces throughput hit at higher batch.
* `high-throughput`: MTP disabled — at saturation the verify step costs more than it saves.
* MTP runs on the v2 speculative path.

**EPLB + DeepEP Waterfill (Experimental)**

For recorded/static EPLB reproduction, first record an expert-distribution file by following
[Capture expert selection distribution in MoE models](../../../docs/basic_usage/native_api.mdx#capture-expert-selection-distribution-in-moe-models).
For reproduction runs, use the generated `expert_distribution_recorder_*.pt` as
the initial expert location. **Please checkout to latest main branch for this feature.**

For non-PD reproduction, use:

```bash Command theme={null}
--moe-a2a-backend deepep \
--deepep-mode auto \
--init-expert-location /path/to/expert_distribution_recorder_*.pt \
--enable-deepep-waterfill
```

For PD-Disagg reproduction, use `normal` mode on the prefill server and
`low_latency` mode on the decode server. Add the same `--init-expert-location`
flag to both commands:

```bash Command theme={null}
# prefill
--moe-a2a-backend deepep \
--deepep-mode normal \
--init-expert-location /path/to/expert_distribution_recorder_*.pt \
--enable-deepep-waterfill

# decode
--moe-a2a-backend deepep \
--deepep-mode low_latency \
--init-expert-location /path/to/expert_distribution_recorder_*.pt \
--enable-deepep-waterfill
```

You can also add `--ep-num-redundant-experts` and `--eplb-algorithm` to customize
EPLB placement.

MegaMoE is not supported with this DeepEP Waterfill recipe yet. Waterfill routes
the shared expert through DeepEP for load balancing, so `--enable-deepep-waterfill`
requires `--moe-a2a-backend deepep`.

**FP4 Indexer (Experimental)**

DeepSeek-V4 uses the default indexer path unless `--enable-deepseek-v4-fp4-indexer` is set. Enable this flag to use the experimental FP4 C4 indexer on SM100 GPUs with DeepGEMM FP4 indexer support. This path is intended for decode-heavy long-context workloads where reducing indexer cache bandwidth is beneficial.

```bash Command theme={null}
# Please use the latest main branch for this feature.
sglang serve \
  --model-path deepseek-ai/DeepSeek-V4-Flash \
  --tp 4 \
  --moe-runner-backend flashinfer_mxfp4 \
  --enable-deepseek-v4-fp4-indexer
```

<a id="hopper-note" />

**Hopper (H100 / H200) note**

Two options are available for running DeepSeek-V4 on Hopper:

* **Original FP4 checkpoints** — apply the W4A16 MoE kernels (Marlin) as the command generator picks for Hopper cells. This path works on both H100 and H200 and is the only option for H100 (no FP8 path). It is TP-only; on H200 the Pro variant fits on a single 8-GPU node, while H100 Pro needs 2 nodes (TP=16).
* **Converted FP8 checkpoints** (H100 and H200 only) — pre-repackaged FP8 weights at [`sgl-project/DeepSeek-V4-Flash-FP8`](https://huggingface.co/sgl-project/DeepSeek-V4-Flash-FP8) and [`sgl-project/DeepSeek-V4-Pro-FP8`](https://huggingface.co/sgl-project/DeepSeek-V4-Pro-FP8) unlock DP-attention + DeepEP and richer parallelism (e.g. Pro TP=16 across 2 nodes).

PD-Disagg recipes on H200 may require `docker run --privileged --ulimit memlock=-1`
(or `--device /dev/infiniband:/dev/infiniband --cap-add IPC_LOCK`) so mooncake
can discover the IB HCAs; without IB exposure mooncake silently falls back to
TCP, which can lead to garbled KV transfer on large checkpoints.

**RTX PRO 6000 (SM120 / Blackwell Desktop) note**

RTX PRO 6000 (96 GB) runs **Flash only** — V4-Pro doesn't fit on 8× 96 GB. It uses the
**low-latency / TP-only** recipe (TP=4, single node) with the **Marlin** W4A16 MoE runner and
`--mem-fraction-static 0.70`; the Deploy panel greys out the other recipes for this card.
HiCache and MegaMoE are **not** supported on RTX PRO 6000. For Docker, use the nightly `lmsysorg/sglang:dev` image — SM120 support isn't in `lmsysorg/sglang:latest` yet (the Deploy panel's Docker mode already points this card at `:dev`).

**MegaMoE**

MegaMoE fuses expert dispatch + GEMM into a single kernel for higher throughput
on MoE layers. To enable it, use the **MegaMoE** chip in the Playground
below — the playground will swap `--moe-a2a-backend deepep` for
`--moe-a2a-backend megamoe` and add the relevant env vars automatically.

Two variants are exposed:

* **W4A8** — default MegaMoE kernel (FP4 weights, FP8 activations).
* **W4A4** — adds `SGLANG_OPT_DEEPGEMM_MEGA_MOE_USE_FP4_ACTS=1` and
  `SGLANG_OPT_DEEPGEMM_MEGA_MOE_USE_MXF4_KIND=1` to run the custom W4A4
  kernel (FP4 activations). Higher throughput with negligible accuracy drop
  (\~89.5 GPQA on Pro).

Notes:

* MegaMoE is **only supported on Blackwell GPUs** (B200 / B300 / GB200 / GB300). The chip is hidden when the Deploy panel's base cell sits on Hopper (H100 / H200).
* MegaMoE is **only wired into the `high-throughput` recipe** on Blackwell (per [sgl-project/sglang#26451](https://github.com/sgl-project/sglang/pull/26451)). The chip is hidden on `low-latency` and `balanced` — switch to `high-throughput` to expose it.
* When running MegaMoE, don't set `--moe-runner-backend` manually.
* Adjust `SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK` based on your workload and memory usage. Setting higher number of tokens for MegaMoE requires more HBM space (recommended: 8320 for high-throughput).

**GB300 PD-Disagg cross-pod MNNVL**

On some GB300 clusters with cross-pod KV transfer over NVLink, mooncake may
fail with `nvlink_transport.cpp:497 Requested address ... not found!`. If
this happens, prepend `MC_FORCE_MNNVL=1 NCCL_MNNVL_ENABLE=1 NCCL_CUMEM_ENABLE=1`
to both prefill and decode `sglang serve` commands.

## 3. Advanced Usage

### 3.1 Reasoning

Enable the `deepseek-v4` reasoning parser (toggle **Reasoning Parser** in the **Parsers** card of the [Playground above](#playground)) to separate thinking from the final answer into `reasoning_content` vs `content`.

<Accordion title="Streaming with Thinking Process (Python)">
  ```python Example theme={null}
  from openai import OpenAI

  client = OpenAI(
      base_url="http://localhost:30000/v1",
      api_key="EMPTY"
  )

  response = client.chat.completions.create(
      model="deepseek-ai/DeepSeek-V4-Flash",
      messages=[
          {"role": "user", "content": "Solve this problem step by step: What is 15% of 240?"}
      ],
      max_tokens=2048,
      extra_body={"chat_template_kwargs": {"thinking": True}},
      stream=True,
  )

  thinking_started = False
  has_thinking = False
  has_answer = False

  for chunk in response:
      if not chunk.choices:
          continue
      delta = chunk.choices[0].delta

      if getattr(delta, "reasoning_content", None):
          if not thinking_started:
              print("=============== Thinking =================", flush=True)
              thinking_started = True
          has_thinking = True
          print(delta.reasoning_content, end="", flush=True)

      if delta.content:
          if has_thinking and not has_answer:
              print("\n=============== Content =================", flush=True)
              has_answer = True
          print(delta.content, end="", flush=True)

  print()
  ```
</Accordion>

<Accordion title="Example Output">
  ```text Output theme={null}
  We are asked: "What is 15% of 240?" This is a simple percentage problem. I need to provide a step-by-step solution. The user wants the solution explained step by step. I'll calculate 15% of 240: 0.15 * 240 = 36. I'll break it down into steps: understand what percent means, convert percentage to decimal or fraction, then multiply. I'll present the answer clearly.</think>To find 15% of 240, follow these steps:

  **Step 1: Understand the meaning of percent**
  "Percent" means "per hundred," so 15% means 15 out of every100, or \( \frac{15}{100} \).

  **Step2: Convert the percentage to a decimal or fraction**
  \( 15\% = \frac{15}{100} = 0.15 \)

  **Step3: Multiply by the given number**
  Multiply the decimal form by 240:
  \( 0.15 \times 240 \)

  **Step4: Perform the multiplication**
  \( 0.15 \times 240 = 36 \)

  **Answer:** 15% of 240 is **36**.
  ```
</Accordion>

### 3.2 Tool Calling

Enable the `deepseekv4` tool-call parser (toggle **Tool Call Parser** in the **Parsers** card of the [Playground above](#playground)) to surface structured tool calls via `message.tool_calls`.

<Accordion title="Python Example with Thinking Process">
  ```python Example theme={null}
  from openai import OpenAI

  client = OpenAI(
      base_url="http://localhost:30000/v1",
      api_key="EMPTY"
  )

  tools = [
      {
          "type": "function",
          "function": {
              "name": "get_weather",
              "description": "Get the current weather for a location",
              "parameters": {
                  "type": "object",
                  "properties": {
                      "location": {"type": "string", "description": "The city name"},
                      "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
                  },
                  "required": ["location"],
              },
          },
      }
  ]

  response = client.chat.completions.create(
      model="deepseek-ai/DeepSeek-V4-Flash",
      messages=[{"role": "user", "content": "What's the weather in Beijing?"}],
      tools=tools,
      extra_body={"chat_template_kwargs": {"thinking": True}},
      stream=True,
  )

  thinking_started = False
  has_thinking = False
  tool_calls_accumulator = {}

  for chunk in response:
      if not chunk.choices:
          continue
      delta = chunk.choices[0].delta

      if getattr(delta, "reasoning_content", None):
          if not thinking_started:
              print("=============== Thinking =================", flush=True)
              thinking_started = True
          has_thinking = True
          print(delta.reasoning_content, end="", flush=True)

      if getattr(delta, "tool_calls", None):
          if has_thinking and thinking_started:
              print("\n=============== Content =================\n", flush=True)
              thinking_started = False
          for tool_call in delta.tool_calls:
              index = tool_call.index
              if index not in tool_calls_accumulator:
                  tool_calls_accumulator[index] = {"name": None, "arguments": ""}
              if tool_call.function:
                  if tool_call.function.name:
                      tool_calls_accumulator[index]["name"] = tool_call.function.name
                  if tool_call.function.arguments:
                      tool_calls_accumulator[index]["arguments"] += tool_call.function.arguments

      if delta.content:
          print(delta.content, end="", flush=True)

  for index, tool_call in sorted(tool_calls_accumulator.items()):
      print(f"Tool Call: {tool_call['name']}")
      print(f"   Arguments: {tool_call['arguments']}")

  print()
  ```
</Accordion>

<Accordion title="Example Output">
  ```text Output theme={null}
  The user wants to know the weather in Beijing. I'll use the get_weather function with Beijing as the location. I don't need to specify a unit, so I'll just use the default.</think>

  <｜DSML｜tool_calls>
  <｜DSML｜invoke name="get_weather">
  <｜DSML｜parameter name="location" string="true">Beijing</｜DSML｜parameter>
  </｜DSML｜invoke>
  </｜DSML｜tool_calls>
  ```
</Accordion>

### 3.3 HiCache (Hierarchical KV Caching)

HiCache enables multi-tier KV cache offloading (GPU → CPU → Storage), significantly expanding effective context capacity for long-context and multi-turn scenarios. Combined with UnifiedRadixTree, it provides intelligent prefix caching across all tiers.

To enable HiCache, open the **HiCache** card in the [Playground above](#playground) and flip **Enable**:

* **L2 (GPU + CPU)** — leave Storage on `auto` (default). Cold KV pages spill to CPU pinned memory only.
* **L3 (GPU + CPU + Storage)** — pick a Storage backend (`file` / `mooncake` / `hf3fs` / `nixl`); the Playground emits the canonical `page_first_direct` mem-layout + `direct` IO backend + `wait_complete` prefetch policy, matching the [HiCache best-practices recipe](../../../docs/advanced_features/hicache_best_practices).

The Write policy knob defaults to `write_through` (the upstream default); switch to `write_back` / `write_through_selective` to trade durability for write speed when the storage tier is slow.

For more details, see the [HiCache documentation](../../../docs/advanced_features/hicache).
