> ## Documentation Index
> Fetch the complete documentation index at: https://docs.sglang.io/llms.txt
> Use this file to discover all available pages before exploring further.

# SpecBundle Usage

export const SpecBundleDeployment = () => {
  const baseConfig = {
    options: {
      mode: {
        name: 'mode',
        title: 'Launch Mode',
        renderType: 'radio',
        items: [{
          id: 'with-server',
          label: 'With Server',
          subtitle: 'Launch SGLang server & Benchmark concurrently',
          default: true
        }, {
          id: 'without-server',
          label: 'Without Server',
          subtitle: 'Connect to an existing server (--skip-launch-server)',
          default: false
        }]
      },
      common: {
        name: 'common',
        title: 'Common Configuration',
        renderType: 'inputs',
        items: [{
          id: 'modelPath',
          label: 'Model Path',
          type: 'text',
          placeholder: 'e.g., meta-llama/Llama-3.1-8B-Instruct',
          default: 'meta-llama/Llama-3.1-8B-Instruct',
          description: 'Path to the target model.'
        }, {
          id: 'port',
          label: 'Port',
          type: 'number',
          default: 30000,
          description: 'Port to launch/connect the SGLang server.'
        }, {
          id: 'configList',
          label: 'Config List',
          type: 'text',
          default: '1,3,1,4',
          description: 'Format: <batch-size>,<num-steps>,<topk>,<num-draft-tokens>'
        }, {
          id: 'benchmarkList',
          label: 'Benchmark List',
          type: 'textarea',
          default: 'mtbench:5 ceval:5:accountant',
          description: 'Format: <benchmark-name>:<num-prompts>:<subset>. Supported: aime, ceval, financeqa, gpqa, gsm8k, humaneval, livecodebench, math500, mmlu, mmstar, mtbench, simpleqa'
        }]
      },
      server: {
        name: 'server',
        title: 'Server Configuration',
        renderType: 'inputs',
        requiredMode: 'with-server',
        items: [{
          id: 'draftModelPath',
          label: 'Draft Model Path',
          type: 'text',
          placeholder: 'Path to draft model',
          default: '',
          description: 'Path to the speculative draft model.'
        }, {
          id: 'tpSize',
          label: 'TP Size',
          type: 'number',
          default: 1,
          description: 'Number of GPUs for Tensor Parallelism.'
        }, {
          id: 'memFraction',
          label: 'Memory Fraction Static',
          type: 'number',
          step: '0.1',
          default: 0.9,
          description: 'The memory fraction for the static memory.'
        }, {
          id: 'attentionBackend',
          label: 'Attention Backend',
          type: 'text',
          default: '',
          description: 'The attention backend used in sglang'
        }, {
          id: 'trustRemoteCode',
          label: 'Trust Remote Code',
          type: 'checkbox',
          default: true,
          description: 'Whether to trust remote code.'
        }]
      }
    }
  };
  const getInitialState = () => {
    const initialState = {};
    Object.values(baseConfig.options).forEach(option => {
      if (option.renderType === 'radio') {
        const defaultItem = option.items.find(item => item.default);
        initialState[option.name] = defaultItem ? defaultItem.id : option.items[0].id;
      } else if (option.renderType === 'inputs') {
        option.items.forEach(item => {
          initialState[item.id] = item.default;
        });
      }
    });
    return initialState;
  };
  const [values, setValues] = useState(getInitialState);
  const [isDark, setIsDark] = useState(false);
  useEffect(() => {
    const checkDarkMode = () => {
      const html = document.documentElement;
      const isDarkMode = html.classList.contains('dark') || html.getAttribute('data-theme') === 'dark' || html.style.colorScheme === 'dark';
      setIsDark(isDarkMode);
    };
    checkDarkMode();
    const observer = new MutationObserver(checkDarkMode);
    observer.observe(document.documentElement, {
      attributes: true,
      attributeFilter: ['class', 'data-theme', 'style']
    });
    return () => observer.disconnect();
  }, []);
  const getDisplayOptions = () => {
    const options = {};
    const currentMode = values.mode;
    Object.entries(baseConfig.options).forEach(([key, option]) => {
      if (option.requiredMode && option.requiredMode !== currentMode) {
        return;
      }
      options[key] = option;
    });
    return options;
  };
  const handleRadioChange = (optionName, itemId) => {
    setValues(prev => ({
      ...prev,
      [optionName]: itemId
    }));
  };
  const handleInputChange = (itemId, value) => {
    setValues(prev => ({
      ...prev,
      [itemId]: value
    }));
  };
  const handleCheckboxChange = (itemId, checked) => {
    setValues(prev => ({
      ...prev,
      [itemId]: checked
    }));
  };
  const generateCommand = () => {
    const {mode, modelPath, port, configList, benchmarkList, draftModelPath, tpSize, memFraction, attentionBackend, trustRemoteCode} = values;
    let cmd = 'python bench_eagle3.py';
    if (modelPath) cmd += ` \\\n  --model-path ${modelPath}`;
    if (port) cmd += ` \\\n  --port ${port}`;
    if (configList) cmd += ` \\\n  --config-list ${configList}`;
    if (benchmarkList) cmd += ` \\\n  --benchmark-list ${benchmarkList.replace(/\n/g, ' ')}`;
    if (mode === 'without-server') {
      cmd += ' \\\n  --skip-launch-server';
    } else {
      if (draftModelPath) cmd += ` \\\n  --speculative-draft-model-path ${draftModelPath}`;
      if (tpSize) cmd += ` \\\n  --tp-size ${tpSize}`;
      if (memFraction) cmd += ` \\\n  --mem-fraction-static ${memFraction}`;
      if (attentionBackend) cmd += ` \\\n  --attention-backend ${attentionBackend}`;
      if (trustRemoteCode) cmd += ` \\\n  --trust-remote-code`;
    }
    return cmd;
  };
  const containerStyle = {
    maxWidth: '900px',
    margin: '0 auto',
    display: 'flex',
    flexDirection: 'column',
    gap: '4px'
  };
  const cardStyle = {
    padding: '8px 12px',
    border: `1px solid ${isDark ? '#374151' : '#e5e7eb'}`,
    borderLeft: `3px solid ${isDark ? '#E85D4D' : '#D45D44'}`,
    borderRadius: '4px',
    display: 'flex',
    alignItems: 'flex-start',
    gap: '12px',
    background: isDark ? '#1f2937' : '#fff'
  };
  const titleStyle = {
    fontSize: '13px',
    fontWeight: '600',
    minWidth: '180px',
    flexShrink: 0,
    color: isDark ? '#e5e7eb' : 'inherit',
    paddingTop: '4px'
  };
  const contentStyle = {
    flex: 1
  };
  const itemsStyle = {
    display: 'flex',
    rowGap: '4px',
    columnGap: '6px',
    flexWrap: 'wrap',
    alignItems: 'center'
  };
  const labelBaseStyle = {
    padding: '4px 10px',
    border: `1px solid ${isDark ? '#9ca3af' : '#d1d5db'}`,
    borderRadius: '3px',
    cursor: 'pointer',
    display: 'inline-flex',
    flexDirection: 'column',
    alignItems: 'center',
    justifyContent: 'center',
    fontWeight: '500',
    fontSize: '13px',
    transition: 'all 0.2s',
    userSelect: 'none',
    minWidth: '45px',
    textAlign: 'center',
    background: isDark ? '#374151' : '#fff',
    color: isDark ? '#e5e7eb' : 'inherit'
  };
  const checkedStyle = {
    background: '#D45D44',
    color: 'white',
    borderColor: '#D45D44'
  };
  const subtitleStyle = {
    display: 'block',
    fontSize: '9px',
    marginTop: '1px',
    lineHeight: '1.1',
    opacity: 0.7
  };
  const inputGroupStyle = {
    display: 'flex',
    flexDirection: 'column',
    gap: '8px'
  };
  const inputRowStyle = {
    display: 'flex',
    alignItems: 'flex-start',
    gap: '12px'
  };
  const inputLabelStyle = {
    fontSize: '13px',
    fontWeight: '500',
    minWidth: '180px',
    flexShrink: 0,
    color: isDark ? '#e5e7eb' : 'inherit',
    paddingTop: '8px'
  };
  const inputContentStyle = {
    flex: 1,
    display: 'flex',
    flexDirection: 'column'
  };
  const inputStyle = {
    padding: '8px 12px',
    border: `1px solid ${isDark ? '#4b5563' : '#d1d5db'}`,
    borderRadius: '4px',
    fontSize: '13px',
    background: isDark ? '#374151' : '#fff',
    color: isDark ? '#e5e7eb' : 'inherit',
    width: '100%',
    boxSizing: 'border-box'
  };
  const textareaStyle = {
    ...inputStyle,
    minHeight: '60px',
    resize: 'vertical'
  };
  const descStyle = {
    color: isDark ? '#9ca3af' : '#666',
    marginTop: '4px',
    fontSize: '11px'
  };
  const commandDisplayStyle = {
    flex: 1,
    padding: '12px 16px',
    background: isDark ? '#111827' : '#f5f5f5',
    borderRadius: '6px',
    fontFamily: "'Menlo', 'Monaco', 'Courier New', monospace",
    fontSize: '12px',
    lineHeight: '1.5',
    color: isDark ? '#e5e7eb' : '#374151',
    whiteSpace: 'pre-wrap',
    overflowX: 'auto',
    margin: 0,
    border: `1px solid ${isDark ? '#374151' : '#e5e7eb'}`
  };
  const displayOptions = getDisplayOptions();
  return <div style={containerStyle} className="not-prose">
      {Object.entries(displayOptions).map(([key, option]) => <div key={key}>
          {}
          {option.renderType === 'radio' && <div style={cardStyle}>
              <div style={titleStyle}>{option.title}</div>
              <div style={{
    ...contentStyle,
    ...itemsStyle
  }}>
                {option.items.map(item => {
    const isChecked = values[option.name] === item.id;
    return <label key={item.id} style={{
      ...labelBaseStyle,
      ...isChecked ? checkedStyle : {}
    }}>
                      <input type="radio" name={option.name} value={item.id} checked={isChecked} onChange={() => handleRadioChange(option.name, item.id)} style={{
      display: 'none'
    }} />
                      {item.label}
                      {item.subtitle && <small style={{
      ...subtitleStyle,
      color: isChecked ? 'rgba(255,255,255,0.85)' : 'inherit'
    }}>{item.subtitle}</small>}
                    </label>;
  })}
              </div>
            </div>}

          {}
          {option.renderType === 'inputs' && <div style={{
    display: 'flex',
    flexDirection: 'column',
    gap: '4px'
  }}>
              {option.items.map(item => <div key={item.id} style={cardStyle}>
                  <div style={inputLabelStyle}>{item.label}</div>
                  <div style={inputContentStyle}>
                    {item.type === 'textarea' ? <textarea value={values[item.id]} onChange={e => handleInputChange(item.id, e.target.value)} style={textareaStyle} /> : item.type === 'checkbox' ? <label style={{
    display: 'flex',
    alignItems: 'center',
    cursor: 'pointer',
    gap: '8px',
    padding: '4px 0'
  }}>
                        <input type="checkbox" checked={values[item.id]} onChange={e => handleCheckboxChange(item.id, e.target.checked)} style={{
    width: '16px',
    height: '16px',
    cursor: 'pointer'
  }} />
                        <span style={{
    fontSize: '13px',
    color: isDark ? '#e5e7eb' : 'inherit'
  }}>Enabled</span>
                      </label> : <input type={item.type} value={values[item.id]} placeholder={item.placeholder} step={item.step} onChange={e => handleInputChange(item.id, e.target.value)} style={inputStyle} />}
                    {item.description && <small style={descStyle}>{item.description}</small>}
                  </div>
                </div>)}
            </div>}
        </div>)}

      <div style={cardStyle}>
        <div style={titleStyle}>Generated Command</div>
        <pre style={commandDisplayStyle}>{generateCommand()}</pre>
      </div>
    </div>;
};

<img src="https://mintcdn.com/lmsysorg/iZdDMbLWP1BLEIzC/logo/logo.png?fit=max&auto=format&n=iZdDMbLWP1BLEIzC&q=85&s=b27d467947ee2167744cb07b69badf00" alt="specbundle logo" width="2392" height="728" data-path="logo/logo.png" />

## About SpecBundle

Speculative decoding, especially EAGLE3, offer strong theoretical guarantees alongside consistent empirical improvements in token acceptance rate and end-to-end inference speed. However, despite these advances, adoption of speculative decoding—especially EAGLE3—remains limited in the open-source ecosystem, due primarily to three key factors.

1. Lack of production-ready training infrastructure: Existing speculative decoding toolchains are largely research prototypes, offering limited system-level optimization and inadequate support for diverse architectures and large-scale models.
2. Scarcity of high-quality draft models: Effective speculative decoding depends on strong draft models, yet publicly available EAGLE3-compatible checkpoints are extremely limited, primarily originating from the original authors.
3. Insufficient training scale of existing drafts: Most available draft models are trained on small or curated datasets and fail to generalize to the large, diverse corpora used in modern LLM training, resulting in low token acceptance rates and diminished practical speedups.

**SpecBundle** is a direct response to these limitations. Jointly driven by the open-source community and industry partners including **Ant Group**, **Meituan**, **Nex-AGI** and **EigenAI**, **SpecBundle** represents the **first open initiative** aimed at democratizing speculative decoding by providing high-performance, production-grade EAGLE3 draft model weights for mainstream open-source LLMs. This initiative also serves to verify the robustness of the [**SpecForge**](https://github.com/sgl-project/SpecForge) framework through multiple scales and architectures.

## Installation

```bash Command theme={null}
git clone https://github.com/sgl-project/SpecForge.git
```

## Usage

### Launch SGLang Server with SpecBundle models

You can use the following command to launch the SGLang server with SpecBundle models. Please add `--tp`, `--ep` and `--mem-fraction-static` arguments when you encounter memory issues.

```bash Command theme={null}
python3 -m sglang.launch_server \
    --model <target-model-path> \
    --speculative-algorithm EAGLE3 \
    --speculative-draft-model-path <draft-model-path> \
    --speculative-num-steps 3 \
    --speculative-eagle-topk 1 \
    --speculative-num-draft-tokens 4
```

For example:

```bash Command theme={null}
SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1 python3 -m sglang.launch_server \
    --model Qwen/Qwen3-30B-A3B-Instruct-2507 \
    --speculative-algorithm EAGLE3 \
    --speculative-draft-model-path lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge-Nex \
    --speculative-num-steps 3 \
    --speculative-eagle-topk 1 \
    --speculative-num-draft-tokens 4 \
    --tp 4
```

### Use SpecBundle to compare the performance of Speculative Decoding draft models

We provide a benchmark suite to evaluate the performance of SpecBundle draft models [here](https://github.com/sgl-project/SpecForge/tree/main/benchmarks).

#### Example:

1. Launch a SGLang Server

```bash Command theme={null}
SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1 python3 -m sglang.launch_server \
    --model Qwen/Qwen3-30B-A3B-Instruct-2507 \
    --speculative-algorithm EAGLE3 \
    --speculative-draft-model-path lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge-Nex \
    --speculative-num-steps 3 \
    --speculative-eagle-topk 1 \
    --speculative-num-draft-tokens 4 \
    --tp 4
```

2. Use the benchmark suite to evaluate the performance of SpecBundle draft models

`bench_eagle3.py` can help you launch a SGLang server process and a Benchmarking process concurrently. In this way, you don't have to launch the SGLang server manually, this script will manually handle the SGLang launch under different speculative decoding configurations. Some important arguments are:

* `--model-path`: the path to the target model.
* `--speculative-draft-model-path`: the path to the draft model.
* `--port`: the port to launch the SGLang server.
* `--trust-remote-code`: trust the remote code.
* `--mem-fraction-static`: the memory fraction for the static memory.
* `--tp-size`: the tensor parallelism size.
* `--attention-backend`: the attention backend.
* `--config-list`: the list of speculative decoding configuration to test, the format is `<batch-size>,<num-steps>,<topk>,<num-draft-tokens>`.
* `--benchmark-list`: the list of benchmarks to test, the format is `<benchmark-name>:<num-prompts>:<subset>`.

```bash Command theme={null}
cd SpecForge/benchmarks
python bench_eagle3.py \
    --model-path Qwen/Qwen3-30B-A3B-Instruct-2507 \
    --port 30000 \
    --config-list 1,3,1,4 \
    --benchmark-list mtbench:5 gsm8k:100 \
    --skip-launch-server
```

**Interactive Command Generator**: Use the configuration selector below to automatically generate the appropriate test command for your model and benchmark.

<SpecBundleDeployment />

It will generate a json file, content is listed below:

```json Config theme={null}
{
  "mtbench": [
    {
      "batch_size": 1,
      "steps": null,
      "topk": null,
      "num_draft_tokens": null,
      "metrics": [
        {
          "latency": 12.232808108034078,
          "output_throughput": 319.71399906382845,
          "accept_length": 2.170366259711432,
          "accuracy": null,
          "num_questions": 5,
          "num_valid_predictions": 0,
          "categorical_performance": null
        }
      ],
      "num_samples": 5
    }
  ],
  "gsm8k": [
    {
      "batch_size": 1,
      "steps": null,
      "topk": null,
      "num_draft_tokens": null,
      "metrics": [
        {
          "latency": 37.42077191895805,
          "output_throughput": 373.6160234823207,
          "accept_length": 2.643410852713178,
          "accuracy": 0.96,
          "num_questions": 100,
          "num_valid_predictions": 100,
          "categorical_performance": null
        }
      ],
      "num_samples": 100
    }
  ]
}
```

## Performance Scores

We evaluate the performance of SpecBundle draft models on various benchmarks, please visit the [Performance Dashboard](https://docs.sglang.io/SpecForge/SpecBundle/index.html) for more details.
