> ## Documentation Index
> Fetch the complete documentation index at: https://docs.sglang.io/llms.txt
> Use this file to discover all available pages before exploring further.

# Ministral-3

export const Ministral3Deployment = () => {
  const options = {
    hardware: {
      name: 'hardware',
      title: 'Hardware Platform',
      items: [{
        id: 'mi300x',
        label: 'MI300x',
        default: true
      }, {
        id: 'mi325x',
        label: 'MI325x',
        default: false
      }, {
        id: 'mi355x',
        label: 'MI355x',
        default: false
      }]
    },
    model: {
      name: 'model',
      title: 'Model',
      items: [{
        id: 'small',
        label: 'Ministral-3-8B-Instruct-2512',
        default: true
      }, {
        id: 'large',
        label: 'Ministral-3-14B-Instruct-2512',
        default: false
      }]
    },
    toolcall: {
      name: 'toolcall',
      title: 'Tool Call Parser',
      items: [{
        id: 'enabled',
        label: 'enabled',
        default: true
      }, {
        id: 'disabled',
        label: 'disabled',
        default: false
      }],
      commandRule: value => value === 'enabled' ? '--tool-call-parser mistral' : null
    }
  };
  const modelConfigs = {
    small: {
      modelId: 'mistralai/Ministral-3-8B-Instruct-2512',
      tpByHardware: {
        mi300x: 1,
        mi325x: 1,
        mi355x: 1
      }
    },
    large: {
      modelId: 'mistralai/Ministral-3-14B-Instruct-2512',
      tpByHardware: {
        mi300x: 1,
        mi325x: 1,
        mi355x: 1
      }
    }
  };
  const generateCommand = values => {
    const {hardware, model} = values;
    const modelCfg = modelConfigs[model];
    if (!modelCfg) return `# Error: Unknown model selection: ${model}`;
    const tp = modelCfg.tpByHardware[hardware];
    if (!tp) return `# Error: Unknown hardware platform: ${hardware}`;
    let cmd = 'sglang serve \\\n';
    cmd += `  --model-path ${modelCfg.modelId}`;
    if (tp > 1) {
      cmd += ` \\\n  --tp ${tp}`;
    }
    cmd += ` \\\n  --trust-remote-code`;
    for (const [key, option] of Object.entries(options)) {
      if (option.commandRule) {
        const rule = option.commandRule(values[key]);
        if (rule) cmd += ` \\\n  ${rule}`;
      }
    }
    return cmd;
  };
  const getInitialState = () => {
    const initialState = {};
    Object.entries(options).forEach(([key, option]) => {
      if (option.type === 'checkbox') {
        initialState[key] = (option.items || []).filter(item => item.default).map(item => item.id);
        return;
      }
      if (option.type === 'text') {
        initialState[key] = option.default || '';
        return;
      }
      let items = option.items || [];
      if (option.getDynamicItems) {
        const defaultValues = {};
        Object.entries(options).forEach(([innerKey, innerOption]) => {
          if (innerOption.type === 'checkbox') {
            defaultValues[innerKey] = (innerOption.items || []).filter(item => item.default).map(item => item.id);
          } else if (innerOption.type === 'text') {
            defaultValues[innerKey] = innerOption.default || '';
          } else if (innerOption.items && innerOption.items.length > 0) {
            const defaultItem = innerOption.items.find(item => item.default);
            defaultValues[innerKey] = defaultItem ? defaultItem.id : innerOption.items[0].id;
          }
        });
        items = option.getDynamicItems(defaultValues);
      }
      const defaultItem = items && items.find(item => item.default);
      initialState[key] = defaultItem ? defaultItem.id : items && items[0] ? items[0].id : '';
    });
    return initialState;
  };
  const [values, setValues] = useState(getInitialState);
  const [isDark, setIsDark] = useState(false);
  useEffect(() => {
    const checkDarkMode = () => {
      const html = document.documentElement;
      const isDarkMode = html.classList.contains('dark') || html.getAttribute('data-theme') === 'dark' || html.style.colorScheme === 'dark';
      setIsDark(isDarkMode);
    };
    checkDarkMode();
    const observer = new MutationObserver(checkDarkMode);
    observer.observe(document.documentElement, {
      attributes: true,
      attributeFilter: ['class', 'data-theme', 'style']
    });
    return () => observer.disconnect();
  }, []);
  const handleRadioChange = (optionName, value) => {
    setValues(prev => ({
      ...prev,
      [optionName]: value
    }));
  };
  const handleCheckboxChange = (optionName, itemId, isChecked) => {
    setValues(prev => {
      const currentValues = prev[optionName] || [];
      if (isChecked) {
        return {
          ...prev,
          [optionName]: [...currentValues, itemId]
        };
      }
      return {
        ...prev,
        [optionName]: currentValues.filter(id => id !== itemId)
      };
    });
  };
  const handleTextChange = (optionName, value) => {
    setValues(prev => ({
      ...prev,
      [optionName]: value
    }));
  };
  const command = generateCommand(values);
  const containerStyle = {
    maxWidth: '900px',
    margin: '0 auto',
    display: 'flex',
    flexDirection: 'column',
    gap: '4px'
  };
  const cardStyle = {
    padding: '8px 12px',
    border: `1px solid ${isDark ? '#374151' : '#e5e7eb'}`,
    borderLeft: `3px solid ${isDark ? '#E85D4D' : '#D45D44'}`,
    borderRadius: '4px',
    display: 'flex',
    alignItems: 'center',
    gap: '12px',
    background: isDark ? '#1f2937' : '#fff'
  };
  const titleStyle = {
    fontSize: '13px',
    fontWeight: '600',
    minWidth: '140px',
    flexShrink: 0,
    color: isDark ? '#e5e7eb' : 'inherit'
  };
  const itemsStyle = {
    display: 'flex',
    rowGap: '2px',
    columnGap: '6px',
    flexWrap: 'wrap',
    alignItems: 'center',
    flex: 1
  };
  const labelBaseStyle = {
    padding: '4px 10px',
    border: `1px solid ${isDark ? '#9ca3af' : '#d1d5db'}`,
    borderRadius: '3px',
    cursor: 'pointer',
    display: 'inline-flex',
    flexDirection: 'column',
    alignItems: 'center',
    justifyContent: 'center',
    fontWeight: '500',
    fontSize: '13px',
    transition: 'all 0.2s',
    userSelect: 'none',
    minWidth: '45px',
    textAlign: 'center',
    flex: 1,
    background: isDark ? '#374151' : '#fff',
    color: isDark ? '#e5e7eb' : 'inherit'
  };
  const checkedStyle = {
    background: '#D45D44',
    color: 'white',
    borderColor: '#D45D44'
  };
  const disabledStyle = {
    cursor: 'not-allowed',
    opacity: 0.5
  };
  const subtitleStyle = {
    display: 'block',
    fontSize: '9px',
    marginTop: '1px',
    lineHeight: '1.1',
    opacity: 0.7
  };
  const textInputStyle = {
    flex: 1,
    padding: '8px 10px',
    borderRadius: '4px',
    border: `1px solid ${isDark ? '#4b5563' : '#d1d5db'}`,
    background: isDark ? '#111827' : '#fff',
    color: isDark ? '#e5e7eb' : '#111827',
    fontSize: '13px'
  };
  const commandDisplayStyle = {
    flex: 1,
    padding: '12px 16px',
    background: isDark ? '#111827' : '#f5f5f5',
    borderRadius: '6px',
    fontFamily: "'Menlo', 'Monaco', 'Courier New', monospace",
    fontSize: '12px',
    lineHeight: '1.5',
    color: isDark ? '#e5e7eb' : '#374151',
    whiteSpace: 'pre-wrap',
    overflowX: 'auto',
    margin: 0,
    border: `1px solid ${isDark ? '#374151' : '#e5e7eb'}`
  };
  return <div style={containerStyle} className="not-prose">
      {Object.entries(options).map(([key, option]) => {
    if (option.condition && !option.condition(values)) {
      return null;
    }
    const items = option.getDynamicItems ? option.getDynamicItems(values) : option.items || [];
    return <div key={key} style={cardStyle}>
            <div style={titleStyle}>{option.title}</div>
            <div style={itemsStyle}>
              {option.type === 'text' ? <input type="text" value={values[option.name] || ''} placeholder={option.placeholder || ''} onChange={event => handleTextChange(option.name, event.target.value)} style={textInputStyle} /> : option.type === 'checkbox' ? (option.items || []).map(item => {
      const isChecked = (values[option.name] || []).includes(item.id);
      const isDisabled = item.required || typeof item.disabledWhen === 'function' && item.disabledWhen(values);
      return <label key={item.id} title={item.disabledReason || ''} style={{
        ...labelBaseStyle,
        ...isChecked ? checkedStyle : {},
        ...isDisabled ? disabledStyle : {}
      }}>
                      <input type="checkbox" checked={isChecked} disabled={isDisabled} onChange={event => handleCheckboxChange(option.name, item.id, event.target.checked)} style={{
        display: 'none'
      }} />
                      {item.label}
                      {item.subtitle && <small style={{
        ...subtitleStyle,
        color: isChecked ? 'rgba(255,255,255,0.85)' : 'inherit'
      }}>
                          {item.subtitle}
                        </small>}
                    </label>;
    }) : items.map(item => {
      const isChecked = values[option.name] === item.id;
      const isDisabled = Boolean(item.disabled);
      return <label key={item.id} title={item.disabledReason || ''} style={{
        ...labelBaseStyle,
        ...isChecked ? checkedStyle : {},
        ...isDisabled ? disabledStyle : {}
      }}>
                      <input type="radio" name={option.name} value={item.id} checked={isChecked} disabled={isDisabled} onChange={() => !isDisabled && handleRadioChange(option.name, item.id)} style={{
        display: 'none'
      }} />
                      {item.label}
                      {item.subtitle && <small style={{
        ...subtitleStyle,
        color: isChecked ? 'rgba(255,255,255,0.85)' : 'inherit'
      }}>
                          {item.subtitle}
                        </small>}
                    </label>;
    })}
            </div>
          </div>;
  })}
      <div style={cardStyle}>
        <div style={titleStyle}>Run this Command:</div>
        <pre style={commandDisplayStyle}>{command}</pre>
      </div>
    </div>;
};

## 1. Model Introduction

The largest model in the Ministral 3 family, Ministral 3 14B offers frontier capabilities and performance comparable to its larger Mistral Small 3.2 24B counterpart. A powerful and efficient language model with vision capabilities.

The Ministral 3 14B Instruct model offers the following capabilities:

Vision: Enables the model to analyze images and provide insights based on visual content, in addition to text.
Multilingual: Supports dozens of languages, including English, French, Spanish, German, Italian, Portuguese, Dutch, Chinese, Japanese, Korean, Arabic.
System Prompt: Maintains strong adherence and support for system prompts.
Agentic: Offers best-in-class agentic capabilities with native function calling and JSON outputting.
Edge-Optimized: Delivers best-in-class performance at a small scale, deployable anywhere.
Apache 2.0 License: Open-source license allowing usage and modification for both commercial and non-commercial purposes.
Large Context Window: Supports a 256k context window.

For further details, please refer to the [official documentation](https://github.com/mistralai)

## 2. SGLang Installation

Please refer to the [official SGLang installation guide](../../../docs/get-started/install) for installation instructions.

## 3. Model Deployment

This section provides deployment configurations optimized for different hardware platforms and use cases.

### 3.1 Basic Configuration

**Interactive Command Generator**: Use the configuration selector below to automatically generate the appropriate deployment command for your hardware platform, model variant, deployment strategy, and thinking capabilities.

<Ministral3Deployment />

### 3.2 Configuration Tips

**Context length vs memory**: Ministral-3 advertises a long context window; if you are memory-constrained, start by lowering --context-length (for example 32768) and increase once things are stable.

**Pre-installation steps**: Adding the following steps after launching the docker

```shell Command theme={null}
pip install mistral-common --upgrade
pip install transformers==5.0.0.rc0
```

## 4. Model Invocation

### 4.1 Basic Usage

For basic API usage and request examples, please refer to:

* [SGLang Basic Usage Guide](../../../docs/basic_usage/send_request)
* [SGLang OpenAI Vision API Guide](../../../docs/basic_usage/openai_api_vision)

### 4.2 Advanced Usage

#### 4.2.1 Launch the docker

```shell Command theme={null}
docker pull lmsysorg/sglang:v0.5.9-rocm720-mi30x
```

```shell Command theme={null}
docker run -d -it --ipc=host --network=host --privileged \
  --cap-add=CAP_SYS_ADMIN \
  --device=/dev/kfd --device=/dev/dri --device=/dev/mem \
  --group-add video --cap-add=SYS_PTRACE \
  --security-opt seccomp=unconfined \
  -v /:/work \
  -e SHELL=/bin/bash \
  --name Ministral \
 lmsysorg/sglang:v0.5.9-rocm720-mi30x \
  /bin/bash
```

#### 4.2.2 Launch the server

```shell Command theme={null}
sglang serve \
  --model-path mistralai/Ministral-3-14B-Instruct-2512 \
  --tp 1 \
  --trust-remote-code
```

## 5. Benchmark

This section uses **industry-standard configurations** for comparable benchmark results.

### 5.1 Speed Benchmark

**Test Environment:**

* Hardware: MI300X GPU (8x)

* Model: mistralai/Ministral-3-14B-Instruct-2512

* Tensor Parallelism: 1

* SGLang Version: 0.5.7

* Model Deployment Command:

```bash Command theme={null}
sglang serve \
  --model-path mistralai/Ministral-3-14B-Instruct-2512 \
  --tp 1 \
  --trust-remote-code
```

##### Low Concurrency

* Benchmark Command:

```bash Command theme={null}
python3 -m sglang.bench_serving \
  --backend sglang \
  --model mistralai/Ministral-3-14B-Instruct-2512 \
  --dataset-name random \
  --random-input-len 1000 \
  --random-output-len 1000 \
  --num-prompts 10 \
  --max-concurrency 1 \
  --request-rate inf
```

* Test Results:

```text Output theme={null}
============ Serving Benchmark Result ============
Backend:                                 sglang
Traffic request rate:                    inf
Max request concurrency:                 1
Successful requests:                     10
Benchmark duration (s):                  65.08
Total input tokens:                      6101
Total input text tokens:                 6101
Total input vision tokens:               0
Total generated tokens:                  4220
Total generated tokens (retokenized):    4218
Request throughput (req/s):              0.15
Input token throughput (tok/s):          93.75
Output token throughput (tok/s):         64.84
Peak output token throughput (tok/s):    151.00
Peak concurrent requests:                2
Total token throughput (tok/s):          158.59
Concurrency:                             1.00
----------------End-to-End Latency----------------
Mean E2E Latency (ms):                   6505.51
Median E2E Latency (ms):                 3037.37
---------------Time to First Token----------------
Mean TTFT (ms):                          3709.33
Median TTFT (ms):                        53.72
P99 TTFT (ms):                           33320.77
-----Time per Output Token (excl. 1st token)------
Mean TPOT (ms):                          6.63
Median TPOT (ms):                        6.64
P99 TPOT (ms):                           6.66
---------------Inter-Token Latency----------------
Mean ITL (ms):                           6.64
Median ITL (ms):                         6.65
P95 ITL (ms):                            6.75
P99 ITL (ms):                            6.82
Max ITL (ms):                            8.45
==================================================
```

##### Medium Concurrency

* Benchmark Command:

```bash Command theme={null}
python3 -m sglang.bench_serving \
  --backend sglang \
  --model mistralai/Ministral-3-14B-Instruct-2512 \
  --dataset-name random \
  --random-input-len 1000 \
  --random-output-len 1000 \
  --num-prompts 80 \
  --max-concurrency 16 \
  --request-rate inf
```

* Test Results:

```text Output theme={null}
============ Serving Benchmark Result ============
Backend:                                 sglang
Traffic request rate:                    inf
Max request concurrency:                 16
Successful requests:                     80
Benchmark duration (s):                  31.20
Total input tokens:                      39668
Total input text tokens:                 39668
Total input vision tokens:               0
Total generated tokens:                  40805
Total generated tokens (retokenized):    40783
Request throughput (req/s):              2.56
Input token throughput (tok/s):          1271.38
Output token throughput (tok/s):         1307.82
Peak output token throughput (tok/s):    1760.00
Peak concurrent requests:                22
Total token throughput (tok/s):          2579.20
Concurrency:                             13.72
----------------End-to-End Latency----------------
Mean E2E Latency (ms):                   5351.07
Median E2E Latency (ms):                 5626.45
---------------Time to First Token----------------
Mean TTFT (ms):                          280.87
Median TTFT (ms):                        68.16
P99 TTFT (ms):                           1194.79
-----Time per Output Token (excl. 1st token)------
Mean TPOT (ms):                          10.47
Median TPOT (ms):                        10.10
P99 TPOT (ms):                           20.00
---------------Inter-Token Latency----------------
Mean ITL (ms):                           9.96
Median ITL (ms):                         9.10
P95 ITL (ms):                            9.87
P99 ITL (ms):                            51.39
Max ITL (ms):                            888.63
==================================================
```

##### High Concurrency

* Benchmark Command:

```bash Command theme={null}
python3 -m sglang.bench_serving \
  --backend sglang \
  --model mistralai/Ministral-3-14B-Instruct-2512 \
  --dataset-name random \
  --random-input-len 1000 \
  --random-output-len 1000 \
  --num-prompts 500 \
  --max-concurrency 100 \
  --request-rate inf
```

* Test Results:

```text Output theme={null}
============ Serving Benchmark Result ============
Backend:                                 sglang
Traffic request rate:                    inf
Max request concurrency:                 100
Successful requests:                     500
Benchmark duration (s):                  88.75
Total input tokens:                      249831
Total input text tokens:                 249831
Total input vision tokens:               0
Total generated tokens:                  252662
Total generated tokens (retokenized):    252547
Request throughput (req/s):              5.63
Input token throughput (tok/s):          2815.01
Output token throughput (tok/s):         2846.91
Peak output token throughput (tok/s):    4271.00
Peak concurrent requests:                110
Total token throughput (tok/s):          5661.93
Concurrency:                             93.04
----------------End-to-End Latency----------------
Mean E2E Latency (ms):                   16514.45
Median E2E Latency (ms):                 15834.45
---------------Time to First Token----------------
Mean TTFT (ms):                          148.57
Median TTFT (ms):                        99.15
P99 TTFT (ms):                           455.86
-----Time per Output Token (excl. 1st token)------
Mean TPOT (ms):                          32.93
Median TPOT (ms):                        34.73
P99 TPOT (ms):                           38.05
---------------Inter-Token Latency----------------
Mean ITL (ms):                           32.45
Median ITL (ms):                         27.30
P95 ITL (ms):                            71.73
P99 ITL (ms):                            73.45
Max ITL (ms):                            328.10
==================================================
```

### 5.2 Accuracy Benchmark

Document model accuracy on standard benchmarks:

#### 5.2.1 GSM8K Benchmark

* Benchmark Command

```bash Command theme={null}
python3 benchmark/gsm8k/bench_sglang.py \
  --num-shots 8 \
  --num-questions 1316 \
  --parallel 1316
```

**Test Results:**

```text Output theme={null}
Accuracy: 0.959
Invalid: 0.000
Latency: 29.185 s
Output throughput: 4854.672 token/s
```