> ## Documentation Index
> Fetch the complete documentation index at: https://docs.sglang.io/llms.txt
> Use this file to discover all available pages before exploring further.

# Z-Image-Turbo

export const ZImageTurboDeployment = () => {
  const config = {
    modelFamily: 'Z-Image-Turbo',
    options: {
      hardware: {
        name: 'hardware',
        title: 'Hardware Platform',
        items: [{
          id: 'mi300x',
          label: 'MI300X',
          default: true
        }, {
          id: 'mi325x',
          label: 'MI325X',
          default: false
        }, {
          id: 'mi355x',
          label: 'MI355X',
          default: false
        }, {
          id: 'b200',
          label: 'B200',
          default: true
        }, {
          id: 'h200',
          label: 'H200',
          default: false
        }, {
          id: 'h100',
          label: 'H100',
          default: false
        }]
      }
    },
    generateCommand: function (values) {
      return `sglang serve \\
  --model-path Tongyi-MAI/Z-Image-Turbo \\
  --ulysses-degree=1 \\
  --ring-degree=1`;
    }
  };
  if (!config || !config.options) {
    return <div>Error: Invalid configuration provided</div>;
  }
  const getInitialState = () => {
    const initialState = {};
    Object.entries(config.options).forEach(([key, option]) => {
      if (option.type === 'checkbox') {
        initialState[key] = (option.items || []).filter(item => item.default).map(item => item.id);
        return;
      }
      if (option.type === 'text') {
        initialState[key] = option.default || '';
        return;
      }
      let items = option.items || [];
      if (option.getDynamicItems) {
        const defaultValues = {};
        Object.entries(config.options).forEach(([innerKey, innerOption]) => {
          if (innerOption.type === 'checkbox') {
            defaultValues[innerKey] = (innerOption.items || []).filter(item => item.default).map(item => item.id);
          } else if (innerOption.type === 'text') {
            defaultValues[innerKey] = innerOption.default || '';
          } else if (innerOption.items && innerOption.items.length > 0) {
            const defaultItem = innerOption.items.find(item => item.default);
            defaultValues[innerKey] = defaultItem ? defaultItem.id : innerOption.items[0].id;
          }
        });
        items = option.getDynamicItems(defaultValues);
      }
      const defaultItem = items && items.find(item => item.default);
      initialState[key] = defaultItem ? defaultItem.id : items && items[0] ? items[0].id : '';
    });
    return initialState;
  };
  const [values, setValues] = useState(getInitialState);
  const [isDark, setIsDark] = useState(false);
  useEffect(() => {
    const checkDarkMode = () => {
      const html = document.documentElement;
      const isDarkMode = html.classList.contains('dark') || html.getAttribute('data-theme') === 'dark' || html.style.colorScheme === 'dark';
      setIsDark(isDarkMode);
    };
    checkDarkMode();
    const observer = new MutationObserver(checkDarkMode);
    observer.observe(document.documentElement, {
      attributes: true,
      attributeFilter: ['class', 'data-theme', 'style']
    });
    return () => observer.disconnect();
  }, []);
  const handleRadioChange = (optionName, value) => {
    setValues(prev => ({
      ...prev,
      [optionName]: value
    }));
  };
  const handleCheckboxChange = (optionName, itemId, isChecked) => {
    setValues(prev => {
      const currentValues = prev[optionName] || [];
      if (isChecked) {
        return {
          ...prev,
          [optionName]: [...currentValues, itemId]
        };
      }
      return {
        ...prev,
        [optionName]: currentValues.filter(id => id !== itemId)
      };
    });
  };
  const handleTextChange = (optionName, value) => {
    setValues(prev => ({
      ...prev,
      [optionName]: value
    }));
  };
  const command = config.generateCommand ? config.generateCommand.call(config, values) : '';
  const containerStyle = {
    maxWidth: '900px',
    margin: '0 auto',
    display: 'flex',
    flexDirection: 'column',
    gap: '4px'
  };
  const cardStyle = {
    padding: '8px 12px',
    border: `1px solid ${isDark ? '#374151' : '#e5e7eb'}`,
    borderLeft: `3px solid ${isDark ? '#E85D4D' : '#D45D44'}`,
    borderRadius: '4px',
    display: 'flex',
    alignItems: 'center',
    gap: '12px',
    background: isDark ? '#1f2937' : '#fff'
  };
  const titleStyle = {
    fontSize: '13px',
    fontWeight: '600',
    minWidth: '140px',
    flexShrink: 0,
    color: isDark ? '#e5e7eb' : 'inherit'
  };
  const itemsStyle = {
    display: 'flex',
    rowGap: '2px',
    columnGap: '6px',
    flexWrap: 'wrap',
    alignItems: 'center',
    flex: 1
  };
  const labelBaseStyle = {
    padding: '4px 10px',
    border: `1px solid ${isDark ? '#9ca3af' : '#d1d5db'}`,
    borderRadius: '3px',
    cursor: 'pointer',
    display: 'inline-flex',
    flexDirection: 'column',
    alignItems: 'center',
    justifyContent: 'center',
    fontWeight: '500',
    fontSize: '13px',
    transition: 'all 0.2s',
    userSelect: 'none',
    minWidth: '45px',
    textAlign: 'center',
    flex: 1,
    background: isDark ? '#374151' : '#fff',
    color: isDark ? '#e5e7eb' : 'inherit'
  };
  const checkedStyle = {
    background: '#D45D44',
    color: 'white',
    borderColor: '#D45D44'
  };
  const disabledStyle = {
    cursor: 'not-allowed',
    opacity: 0.5
  };
  const subtitleStyle = {
    display: 'block',
    fontSize: '9px',
    marginTop: '1px',
    lineHeight: '1.1',
    opacity: 0.7
  };
  const textInputStyle = {
    flex: 1,
    padding: '8px 10px',
    borderRadius: '4px',
    border: `1px solid ${isDark ? '#4b5563' : '#d1d5db'}`,
    background: isDark ? '#111827' : '#fff',
    color: isDark ? '#e5e7eb' : '#111827',
    fontSize: '13px'
  };
  const commandDisplayStyle = {
    flex: 1,
    padding: '12px 16px',
    background: isDark ? '#111827' : '#f5f5f5',
    borderRadius: '6px',
    fontFamily: "'Menlo', 'Monaco', 'Courier New', monospace",
    fontSize: '12px',
    lineHeight: '1.5',
    color: isDark ? '#e5e7eb' : '#374151',
    whiteSpace: 'pre-wrap',
    overflowX: 'auto',
    margin: 0,
    border: `1px solid ${isDark ? '#374151' : '#e5e7eb'}`
  };
  return <div style={containerStyle} className="not-prose">
      {Object.entries(config.options).map(([key, option]) => {
    if (option.condition && !option.condition(values)) {
      return null;
    }
    const items = option.getDynamicItems ? option.getDynamicItems(values) : option.items || [];
    return <div key={key} style={cardStyle}>
            <div style={titleStyle}>{option.title}</div>
            <div style={itemsStyle}>
              {option.type === 'text' ? <input type="text" value={values[option.name] || ''} placeholder={option.placeholder || ''} onChange={event => handleTextChange(option.name, event.target.value)} style={textInputStyle} /> : option.type === 'checkbox' ? (option.items || []).map(item => {
      const isChecked = (values[option.name] || []).includes(item.id);
      const isDisabled = item.required || typeof item.disabledWhen === 'function' && item.disabledWhen(values);
      return <label key={item.id} title={item.disabledReason || ''} style={{
        ...labelBaseStyle,
        ...isChecked ? checkedStyle : {},
        ...isDisabled ? disabledStyle : {}
      }}>
                      <input type="checkbox" checked={isChecked} disabled={isDisabled} onChange={event => handleCheckboxChange(option.name, item.id, event.target.checked)} style={{
        display: 'none'
      }} />
                      {item.label}
                      {item.subtitle && <small style={{
        ...subtitleStyle,
        color: isChecked ? 'rgba(255,255,255,0.85)' : 'inherit'
      }}>
                          {item.subtitle}
                        </small>}
                    </label>;
    }) : items.map(item => {
      const isChecked = values[option.name] === item.id;
      const isDisabled = Boolean(item.disabled);
      return <label key={item.id} title={item.disabledReason || ''} style={{
        ...labelBaseStyle,
        ...isChecked ? checkedStyle : {},
        ...isDisabled ? disabledStyle : {}
      }}>
                      <input type="radio" name={option.name} value={item.id} checked={isChecked} disabled={isDisabled} onChange={() => !isDisabled && handleRadioChange(option.name, item.id)} style={{
        display: 'none'
      }} />
                      {item.label}
                      {item.subtitle && <small style={{
        ...subtitleStyle,
        color: isChecked ? 'rgba(255,255,255,0.85)' : 'inherit'
      }}>
                          {item.subtitle}
                        </small>}
                    </label>;
    })}
            </div>
          </div>;
  })}

      <div style={cardStyle}>
        <div style={titleStyle}>Run this Command:</div>
        <pre style={commandDisplayStyle}>{command}</pre>
      </div>
    </div>;
};

## 1. Model Introduction

[Z-Image](https://github.com/Tongyi-MAI/Z-Image) is a powerful and highly efficient image generation model family with 6B parameters, developed by Tongyi-MAI. It adopts a Scalable Single-Stream DiT (S3-DiT) architecture, where text, visual semantic tokens, and image VAE tokens are concatenated at the sequence level to serve as a unified input stream, maximizing parameter efficiency compared to dual-stream approaches.

[Z-Image-Turbo](https://huggingface.co/Tongyi-MAI/Z-Image-Turbo) is a distilled version of Z-Image that matches or exceeds leading competitors with only 8 NFEs (Number of Function Evaluations). It is powered by two core techniques: **Decoupled-DMD** (few-step distillation) and **DMDR** (fusing DMD with Reinforcement Learning).

**Key Features:**

* **Sub-second Inference Latency**: Achieves sub-second inference on enterprise-grade H800 GPUs and fits comfortably within 16GB VRAM consumer devices
* **Photorealistic Image Generation**: Excels in high-quality photorealistic image generation with rich aesthetics
* **Bilingual Text Rendering**: Supports accurate bilingual text rendering in both English and Chinese
* **Robust Instruction Adherence**: Strong prompt following and instruction adherence capabilities
* **#1 Open-Source Model**: Ranked 8th overall and #1 among open-source models on the [Artificial Analysis Text-to-Image Leaderboard](https://artificialanalysis.ai/image/leaderboard/text-to-image)

For more details, please refer to the [Z-Image-Turbo HuggingFace page](https://huggingface.co/Tongyi-MAI/Z-Image-Turbo), the [GitHub repository](https://github.com/Tongyi-MAI/Z-Image), and the [technical report (arXiv)](https://arxiv.org/abs/2511.22699).

## 2. SGLang-diffusion Installation

SGLang-diffusion offers multiple installation methods. You can choose the most suitable installation method based on your hardware platform and requirements.

Please refer to the [official SGLang-diffusion installation guide](https://github.com/sgl-project/sglang/blob/main/python/sglang/multimodal_gen/docs/install.md) for installation instructions.

## 3. Model Deployment

This section provides deployment configurations optimized for different hardware platforms and use cases.

### 3.1 Basic Configuration

Z-Image-Turbo is optimized for high-quality image generation with only 8 inference steps. The recommended launch configurations vary by hardware.

**Interactive Command Generator**: Use the configuration selector below to automatically generate the appropriate deployment command for your hardware platform.

<ZImageTurboDeployment />

### 3.2 Configuration Tips

Current supported optimization all listed [here](https://github.com/sgl-project/sglang/blob/main/python/sglang/multimodal_gen/docs/support_matrix.md).

* `--vae-path`: Path to a custom VAE model or HuggingFace model ID (e.g., fal/FLUX.2-Tiny-AutoEncoder). If not specified, the VAE will be loaded from the main model path.
* `--num-gpus`: Number of GPUs to use
* `--tp-size`: Tensor parallelism size (only for the encoder; should not be larger than 1 if text encoder offload is enabled, as layer-wise offload plus prefetch is faster)
* `--sp-degree`: Sequence parallelism size (typically should match the number of GPUs)
* `--ulysses-degree`: The degree of DeepSpeed-Ulysses-style SP in USP
* `--ring-degree`: The degree of ring attention-style SP in USP

**AMD ROCm Notes**: Requires SGLang >= v0.5.8.

## 4. API Usage

For complete API documentation, please refer to the [official API usage guide](https://github.com/sgl-project/sglang/blob/main/python/sglang/multimodal_gen/docs/openai_api.md).

### 4.1 Generate an Image

```python Example theme={null}
import base64
from openai import OpenAI

client = OpenAI(api_key="EMPTY", base_url="http://localhost:30000/v1")

response = client.images.generate(
    model="Tongyi-MAI/Z-Image-Turbo",
    prompt="A logo With Bold Large text: SGL Diffusion",
    n=1,
    response_format="b64_json",
)

# Save the generated image
image_bytes = base64.b64decode(response.data[0].b64_json)
with open("output.png", "wb") as f:
    f.write(image_bytes)
```

### 4.2 Advanced Usage

#### 4.2.1 Cache-DiT Acceleration

SGLang integrates [Cache-DiT](https://github.com/vipshop/cache-dit), a caching acceleration engine for Diffusion Transformers (DiT), to achieve up to 7.4x inference speedup with minimal quality loss. You can set `SGLANG_CACHE_DIT_ENABLED=True` to enable it. For more details, please refer to the SGLang Cache-DiT [documentation](https://github.com/sgl-project/sglang/blob/main/python/sglang/multimodal_gen/docs/cache_dit.md).

**Basic Usage**

```bash Command theme={null}
SGLANG_CACHE_DIT_ENABLED=true sglang serve --model-path Tongyi-MAI/Z-Image-Turbo
```

**Advanced Usage**

* DBCache Parameters: DBCache controls block-level caching behavior:

<table style={{width: "100%", borderCollapse: "collapse", tableLayout: "fixed"}}>
  <colgroup>
    <col style={{width: "25.0%"}} />

    <col style={{width: "25.0%"}} />

    <col style={{width: "25.0%"}} />

    <col style={{width: "25.0%"}} />
  </colgroup>

  <thead>
    <tr style={{borderBottom: "2px solid #d55816"}}>
      <th style={{textAlign: "left", padding: "10px 12px", fontWeight: 700, whiteSpace: "nowrap", backgroundColor: "rgba(255,255,255,0.02)"}}>Parameter</th>
      <th style={{textAlign: "left", padding: "10px 12px", fontWeight: 700, whiteSpace: "nowrap", backgroundColor: "rgba(255,255,255,0.05)"}}>Env Variable</th>
      <th style={{textAlign: "left", padding: "10px 12px", fontWeight: 700, whiteSpace: "nowrap", backgroundColor: "rgba(255,255,255,0.02)"}}>Default</th>
      <th style={{textAlign: "left", padding: "10px 12px", fontWeight: 700, whiteSpace: "nowrap", backgroundColor: "rgba(255,255,255,0.05)"}}>Description</th>
    </tr>
  </thead>

  <tbody>
    <tr>
      <td style={{padding: "9px 12px", fontWeight: 500, backgroundColor: "rgba(255,255,255,0.02)"}}>Fn</td>
      <td style={{padding: "9px 12px", backgroundColor: "rgba(255,255,255,0.05)"}}>`SGLANG_CACHE_DIT_FN`</td>
      <td style={{padding: "9px 12px", backgroundColor: "rgba(255,255,255,0.02)"}}>1</td>
      <td style={{padding: "9px 12px", backgroundColor: "rgba(255,255,255,0.05)"}}>Number of first blocks to always compute</td>
    </tr>

    <tr>
      <td style={{padding: "9px 12px", fontWeight: 500, backgroundColor: "rgba(255,255,255,0.02)"}}>Bn</td>
      <td style={{padding: "9px 12px", backgroundColor: "rgba(255,255,255,0.05)"}}>`SGLANG_CACHE_DIT_BN`</td>
      <td style={{padding: "9px 12px", backgroundColor: "rgba(255,255,255,0.02)"}}>0</td>
      <td style={{padding: "9px 12px", backgroundColor: "rgba(255,255,255,0.05)"}}>Number of last blocks to always compute</td>
    </tr>

    <tr>
      <td style={{padding: "9px 12px", fontWeight: 500, backgroundColor: "rgba(255,255,255,0.02)"}}>W</td>
      <td style={{padding: "9px 12px", backgroundColor: "rgba(255,255,255,0.05)"}}>`SGLANG_CACHE_DIT_WARMUP`</td>
      <td style={{padding: "9px 12px", backgroundColor: "rgba(255,255,255,0.02)"}}>4</td>
      <td style={{padding: "9px 12px", backgroundColor: "rgba(255,255,255,0.05)"}}>Warmup steps before caching starts</td>
    </tr>

    <tr>
      <td style={{padding: "9px 12px", fontWeight: 500, backgroundColor: "rgba(255,255,255,0.02)"}}>R</td>
      <td style={{padding: "9px 12px", backgroundColor: "rgba(255,255,255,0.05)"}}>`SGLANG_CACHE_DIT_RDT`</td>
      <td style={{padding: "9px 12px", backgroundColor: "rgba(255,255,255,0.02)"}}>0.24</td>
      <td style={{padding: "9px 12px", backgroundColor: "rgba(255,255,255,0.05)"}}>Residual difference threshold</td>
    </tr>

    <tr>
      <td style={{padding: "9px 12px", fontWeight: 500, backgroundColor: "rgba(255,255,255,0.02)"}}>MC</td>
      <td style={{padding: "9px 12px", backgroundColor: "rgba(255,255,255,0.05)"}}>`SGLANG_CACHE_DIT_MC`</td>
      <td style={{padding: "9px 12px", backgroundColor: "rgba(255,255,255,0.02)"}}>3</td>
      <td style={{padding: "9px 12px", backgroundColor: "rgba(255,255,255,0.05)"}}>Maximum continuous cached steps</td>
    </tr>
  </tbody>
</table>

* TaylorSeer Configuration: TaylorSeer improves caching accuracy using Taylor expansion:

<table style={{width: "100%", borderCollapse: "collapse", tableLayout: "fixed"}}>
  <colgroup>
    <col style={{width: "25.0%"}} />

    <col style={{width: "25.0%"}} />

    <col style={{width: "25.0%"}} />

    <col style={{width: "25.0%"}} />
  </colgroup>

  <thead>
    <tr style={{borderBottom: "2px solid #d55816"}}>
      <th style={{textAlign: "left", padding: "10px 12px", fontWeight: 700, whiteSpace: "nowrap", backgroundColor: "rgba(255,255,255,0.02)"}}>Parameter</th>
      <th style={{textAlign: "left", padding: "10px 12px", fontWeight: 700, whiteSpace: "nowrap", backgroundColor: "rgba(255,255,255,0.05)"}}>Env Variable</th>
      <th style={{textAlign: "left", padding: "10px 12px", fontWeight: 700, whiteSpace: "nowrap", backgroundColor: "rgba(255,255,255,0.02)"}}>Default</th>
      <th style={{textAlign: "left", padding: "10px 12px", fontWeight: 700, whiteSpace: "nowrap", backgroundColor: "rgba(255,255,255,0.05)"}}>Description</th>
    </tr>
  </thead>

  <tbody>
    <tr>
      <td style={{padding: "9px 12px", fontWeight: 500, backgroundColor: "rgba(255,255,255,0.02)"}}>Enable</td>
      <td style={{padding: "9px 12px", backgroundColor: "rgba(255,255,255,0.05)"}}>`SGLANG_CACHE_DIT_TAYLORSEER`</td>
      <td style={{padding: "9px 12px", backgroundColor: "rgba(255,255,255,0.02)"}}>false</td>
      <td style={{padding: "9px 12px", backgroundColor: "rgba(255,255,255,0.05)"}}>Enable TaylorSeer calibrator</td>
    </tr>

    <tr>
      <td style={{padding: "9px 12px", fontWeight: 500, backgroundColor: "rgba(255,255,255,0.02)"}}>Order</td>
      <td style={{padding: "9px 12px", backgroundColor: "rgba(255,255,255,0.05)"}}>`SGLANG_CACHE_DIT_TS_ORDER`</td>
      <td style={{padding: "9px 12px", backgroundColor: "rgba(255,255,255,0.02)"}}>1</td>
      <td style={{padding: "9px 12px", backgroundColor: "rgba(255,255,255,0.05)"}}>Taylor expansion order (1 or 2)</td>
    </tr>
  </tbody>
</table>

Combined Configuration Example:

```bash Command theme={null}
SGLANG_CACHE_DIT_ENABLED=true \
SGLANG_CACHE_DIT_FN=2 \
SGLANG_CACHE_DIT_BN=1 \
SGLANG_CACHE_DIT_WARMUP=4 \
SGLANG_CACHE_DIT_RDT=0.4 \
SGLANG_CACHE_DIT_MC=4 \
SGLANG_CACHE_DIT_TAYLORSEER=true \
SGLANG_CACHE_DIT_TS_ORDER=2 \
sglang serve --model-path Tongyi-MAI/Z-Image-Turbo
```

#### 4.2.2 CPU Offload

* `--dit-cpu-offload`: Use CPU offload for DiT inference. Enable if run out of memory.
* `--text-encoder-cpu-offload`: Use CPU offload for text encoder inference.
* `--vae-cpu-offload`: Use CPU offload for VAE.
* `--pin-cpu-memory`: Pin memory for CPU offload. Only added as a temp workaround if it throws "CUDA error: invalid argument".

## 5. Benchmark

Test Environment:

* Hardware: AMD Instinct MI300X GPU (1x)
* Model: Tongyi-MAI/Z-Image-Turbo
* Docker Image: lmsysorg/sglang:v0.5.8-rocm700-mi30x
* sglang diffusion version: 0.5.8

### 5.1 Speedup Benchmark

#### 5.1.1 Generate an image

**Server Command**:

```shell Command theme={null}
sglang serve --model-path Tongyi-MAI/Z-Image-Turbo \
    --ulysses-degree=1 --ring-degree=1 --port 30000
```

**Benchmark Command**:

```shell Command theme={null}
python3 -m sglang.multimodal_gen.benchmarks.bench_serving \
    --backend sglang-image --dataset vbench --task text-to-image --num-prompts 1 --max-concurrency 1
```

**Result**:

```text Output theme={null}
================= Serving Benchmark Result =================
Task:                                    text-to-image
Model:                                   Tongyi-MAI/Z-Image-Turbo
Dataset:                                 vbench
--------------------------------------------------
Benchmark duration (s):                  1.84
Request rate:                            inf
Max request concurrency:                 1
Successful requests:                     1/1
--------------------------------------------------
Request throughput (req/s):              0.54
Latency Mean (s):                        1.8435
Latency Median (s):                      1.8435
Latency P99 (s):                         1.8435
--------------------------------------------------
Peak Memory Max (MB):                    30689.20
Peak Memory Mean (MB):                   30689.20
Peak Memory Median (MB):                 30689.20
============================================================
```

#### 5.1.2 Generate images with high concurrency

**Benchmark Command**:

```shell Command theme={null}
python3 -m sglang.multimodal_gen.benchmarks.bench_serving \
    --backend sglang-image --dataset vbench --task text-to-image --num-prompts 20 --max-concurrency 20
```

**Result**:

```text Output theme={null}
================= Serving Benchmark Result =================
Task:                                    text-to-image
Model:                                   Tongyi-MAI/Z-Image-Turbo
Dataset:                                 vbench
--------------------------------------------------
Benchmark duration (s):                  35.32
Request rate:                            inf
Max request concurrency:                 20
Successful requests:                     20/20
--------------------------------------------------
Request throughput (req/s):              0.57
Latency Mean (s):                        18.5672
Latency Median (s):                      18.5573
Latency P99 (s):                         34.9880
--------------------------------------------------
Peak Memory Max (MB):                    30689.26
Peak Memory Mean (MB):                   30689.21
Peak Memory Median (MB):                 30689.21
============================================================
```
