> ## Documentation Index > Fetch the complete documentation index at: https://docs.sglang.io/llms.txt > Use this file to discover all available pages before exploring further. # Z-Image-Turbo export const ZImageTurboDeployment = () => { const config = { modelFamily: 'Z-Image-Turbo', options: { hardware: { name: 'hardware', title: 'Hardware Platform', items: [{ id: 'mi300x', label: 'MI300X', default: true }, { id: 'mi325x', label: 'MI325X', default: false }, { id: 'mi355x', label: 'MI355X', default: false }, { id: 'b200', label: 'B200', default: true }, { id: 'h200', label: 'H200', default: false }, { id: 'h100', label: 'H100', default: false }] } }, generateCommand: function (values) { return `sglang serve \\ --model-path Tongyi-MAI/Z-Image-Turbo \\ --ulysses-degree=1 \\ --ring-degree=1`; } }; if (!config || !config.options) { return

Error: Invalid configuration provided

; } const getInitialState = () => { const initialState = {}; Object.entries(config.options).forEach(([key, option]) => { if (option.type === 'checkbox') { initialState[key] = (option.items || []).filter(item => item.default).map(item => item.id); return; } if (option.type === 'text') { initialState[key] = option.default || ''; return; } let items = option.items || []; if (option.getDynamicItems) { const defaultValues = {}; Object.entries(config.options).forEach(([innerKey, innerOption]) => { if (innerOption.type === 'checkbox') { defaultValues[innerKey] = (innerOption.items || []).filter(item => item.default).map(item => item.id); } else if (innerOption.type === 'text') { defaultValues[innerKey] = innerOption.default || ''; } else if (innerOption.items && innerOption.items.length > 0) { const defaultItem = innerOption.items.find(item => item.default); defaultValues[innerKey] = defaultItem ? defaultItem.id : innerOption.items[0].id; } }); items = option.getDynamicItems(defaultValues); } const defaultItem = items && items.find(item => item.default); initialState[key] = defaultItem ? defaultItem.id : items && items[0] ? items[0].id : ''; }); return initialState; }; const [values, setValues] = useState(getInitialState); const [isDark, setIsDark] = useState(false); useEffect(() => { const checkDarkMode = () => { const html = document.documentElement; const isDarkMode = html.classList.contains('dark') || html.getAttribute('data-theme') === 'dark' || html.style.colorScheme === 'dark'; setIsDark(isDarkMode); }; checkDarkMode(); const observer = new MutationObserver(checkDarkMode); observer.observe(document.documentElement, { attributes: true, attributeFilter: ['class', 'data-theme', 'style'] }); return () => observer.disconnect(); }, []); const handleRadioChange = (optionName, value) => { setValues(prev => ({ ...prev, [optionName]: value })); }; const handleCheckboxChange = (optionName, itemId, isChecked) => { setValues(prev => { const currentValues = prev[optionName] || []; if (isChecked) { return { ...prev, [optionName]: [...currentValues, itemId] }; } return { ...prev, [optionName]: currentValues.filter(id => id !== itemId) }; }); }; const handleTextChange = (optionName, value) => { setValues(prev => ({ ...prev, [optionName]: value })); }; const command = config.generateCommand ? config.generateCommand.call(config, values) : ''; const containerStyle = { maxWidth: '900px', margin: '0 auto', display: 'flex', flexDirection: 'column', gap: '4px' }; const cardStyle = { padding: '8px 12px', border: `1px solid ${isDark ? '#374151' : '#e5e7eb'}`, borderLeft: `3px solid ${isDark ? '#E85D4D' : '#D45D44'}`, borderRadius: '4px', display: 'flex', alignItems: 'center', gap: '12px', background: isDark ? '#1f2937' : '#fff' }; const titleStyle = { fontSize: '13px', fontWeight: '600', minWidth: '140px', flexShrink: 0, color: isDark ? '#e5e7eb' : 'inherit' }; const itemsStyle = { display: 'flex', rowGap: '2px', columnGap: '6px', flexWrap: 'wrap', alignItems: 'center', flex: 1 }; const labelBaseStyle = { padding: '4px 10px', border: `1px solid ${isDark ? '#9ca3af' : '#d1d5db'}`, borderRadius: '3px', cursor: 'pointer', display: 'inline-flex', flexDirection: 'column', alignItems: 'center', justifyContent: 'center', fontWeight: '500', fontSize: '13px', transition: 'all 0.2s', userSelect: 'none', minWidth: '45px', textAlign: 'center', flex: 1, background: isDark ? '#374151' : '#fff', color: isDark ? '#e5e7eb' : 'inherit' }; const checkedStyle = { background: '#D45D44', color: 'white', borderColor: '#D45D44' }; const disabledStyle = { cursor: 'not-allowed', opacity: 0.5 }; const subtitleStyle = { display: 'block', fontSize: '9px', marginTop: '1px', lineHeight: '1.1', opacity: 0.7 }; const textInputStyle = { flex: 1, padding: '8px 10px', borderRadius: '4px', border: `1px solid ${isDark ? '#4b5563' : '#d1d5db'}`, background: isDark ? '#111827' : '#fff', color: isDark ? '#e5e7eb' : '#111827', fontSize: '13px' }; const commandDisplayStyle = { flex: 1, padding: '12px 16px', background: isDark ? '#111827' : '#f5f5f5', borderRadius: '6px', fontFamily: "'Menlo', 'Monaco', 'Courier New', monospace", fontSize: '12px', lineHeight: '1.5', color: isDark ? '#e5e7eb' : '#374151', whiteSpace: 'pre-wrap', overflowX: 'auto', margin: 0, border: `1px solid ${isDark ? '#374151' : '#e5e7eb'}` }; return

{Object.entries(config.options).map(([key, option]) => { if (option.condition && !option.condition(values)) { return null; } const items = option.getDynamicItems ? option.getDynamicItems(values) : option.items || []; return

{option.title}

{option.type === 'text' ? handleTextChange(option.name, event.target.value)} style={textInputStyle} /> : option.type === 'checkbox' ? (option.items || []).map(item => { const isChecked = (values[option.name] || []).includes(item.id); const isDisabled = item.required || typeof item.disabledWhen === 'function' && item.disabledWhen(values); return handleCheckboxChange(option.name, item.id, event.target.checked)} style={{ display: 'none' }} /> {item.label} {item.subtitle && {item.subtitle} } ; }) : items.map(item => { const isChecked = values[option.name] === item.id; const isDisabled = Boolean(item.disabled); return !isDisabled && handleRadioChange(option.name, item.id)} style={{ display: 'none' }} /> {item.label} {item.subtitle && {item.subtitle} } ; })}

; })}

Run this Command:

{command}

; }; ## 1. Model Introduction [Z-Image](https://github.com/Tongyi-MAI/Z-Image) is a powerful and highly efficient image generation model family with 6B parameters, developed by Tongyi-MAI. It adopts a Scalable Single-Stream DiT (S3-DiT) architecture, where text, visual semantic tokens, and image VAE tokens are concatenated at the sequence level to serve as a unified input stream, maximizing parameter efficiency compared to dual-stream approaches. [Z-Image-Turbo](https://huggingface.co/Tongyi-MAI/Z-Image-Turbo) is a distilled version of Z-Image that matches or exceeds leading competitors with only 8 NFEs (Number of Function Evaluations). It is powered by two core techniques: **Decoupled-DMD** (few-step distillation) and **DMDR** (fusing DMD with Reinforcement Learning). **Key Features:** * **Sub-second Inference Latency**: Achieves sub-second inference on enterprise-grade H800 GPUs and fits comfortably within 16GB VRAM consumer devices * **Photorealistic Image Generation**: Excels in high-quality photorealistic image generation with rich aesthetics * **Bilingual Text Rendering**: Supports accurate bilingual text rendering in both English and Chinese * **Robust Instruction Adherence**: Strong prompt following and instruction adherence capabilities * **#1 Open-Source Model**: Ranked 8th overall and #1 among open-source models on the [Artificial Analysis Text-to-Image Leaderboard](https://artificialanalysis.ai/image/leaderboard/text-to-image) For more details, please refer to the [Z-Image-Turbo HuggingFace page](https://huggingface.co/Tongyi-MAI/Z-Image-Turbo), the [GitHub repository](https://github.com/Tongyi-MAI/Z-Image), and the [technical report (arXiv)](https://arxiv.org/abs/2511.22699). ## 2. SGLang-diffusion Installation SGLang-diffusion offers multiple installation methods. You can choose the most suitable installation method based on your hardware platform and requirements. Please refer to the [official SGLang-diffusion installation guide](https://docs.sglang.io/docs/sglang-diffusion/installation) for installation instructions. ## 3. Model Deployment This section provides deployment configurations optimized for different hardware platforms and use cases. ### 3.1 Basic Configuration Z-Image-Turbo is optimized for high-quality image generation with only 8 inference steps. The recommended launch configurations vary by hardware. **Interactive Command Generator**: Use the configuration selector below to automatically generate the appropriate deployment command for your hardware platform. ### 3.2 Configuration Tips Currently supported optimizations are listed [here](/docs/sglang-diffusion/compatibility_matrix). * `--vae-path`: Path to a custom VAE model or HuggingFace model ID (e.g., fal/FLUX.2-Tiny-AutoEncoder). If not specified, the VAE will be loaded from the main model path. * `--num-gpus`: Number of GPUs to use * `--tp-size`: Tensor parallelism size (only for the encoder; should not be larger than 1 if text encoder offload is enabled, as layer-wise offload plus prefetch is faster) * `--sp-degree`: Sequence parallelism size (typically should match the number of GPUs) * `--ulysses-degree`: The degree of DeepSpeed-Ulysses-style SP in USP * `--ring-degree`: The degree of ring attention-style SP in USP **AMD ROCm Notes**: Requires SGLang >= v0.5.8. ## 4. API Usage For complete API documentation, please refer to the [official API usage guide](/docs/sglang-diffusion/api/openai_api). ### 4.1 Generate an Image ```python Example theme={null} import base64 from openai import OpenAI client = OpenAI(api_key="EMPTY", base_url="http://localhost:30000/v1") response = client.images.generate( model="Tongyi-MAI/Z-Image-Turbo", prompt="A logo With Bold Large text: SGL Diffusion", n=1, response_format="b64_json", ) # Save the generated image image_bytes = base64.b64decode(response.data[0].b64_json) with open("output.png", "wb") as f: f.write(image_bytes) ``` ### 4.2 Advanced Usage #### 4.2.1 Cache-DiT Acceleration SGLang integrates [Cache-DiT](https://github.com/vipshop/cache-dit), a caching acceleration engine for Diffusion Transformers (DiT), to achieve up to 7.4x inference speedup with minimal quality loss. You can set `SGLANG_CACHE_DIT_ENABLED=True` to enable it. For more details, please refer to the SGLang Cache-DiT [documentation](/docs/sglang-diffusion/cache_dit). **Basic Usage** ```bash Command theme={null} SGLANG_CACHE_DIT_ENABLED=true sglang serve --model-path Tongyi-MAI/Z-Image-Turbo ``` **Advanced Usage** * DBCache Parameters: DBCache controls block-level caching behavior:

Parameter	Env Variable	Default	Description
Fn	`SGLANG_CACHE_DIT_FN`	1	Number of first blocks to always compute
Bn	`SGLANG_CACHE_DIT_BN`	0	Number of last blocks to always compute
W	`SGLANG_CACHE_DIT_WARMUP`	4	Warmup steps before caching starts
R	`SGLANG_CACHE_DIT_RDT`	0.24	Residual difference threshold
MC	`SGLANG_CACHE_DIT_MC`	3	Maximum continuous cached steps

* TaylorSeer Configuration: TaylorSeer improves caching accuracy using Taylor expansion:

Parameter	Env Variable	Default	Description
Enable	`SGLANG_CACHE_DIT_TAYLORSEER`	false	Enable TaylorSeer calibrator
Order	`SGLANG_CACHE_DIT_TS_ORDER`	1	Taylor expansion order (1 or 2)

Combined Configuration Example: ```bash Command theme={null} SGLANG_CACHE_DIT_ENABLED=true \ SGLANG_CACHE_DIT_FN=2 \ SGLANG_CACHE_DIT_BN=1 \ SGLANG_CACHE_DIT_WARMUP=4 \ SGLANG_CACHE_DIT_RDT=0.4 \ SGLANG_CACHE_DIT_MC=4 \ SGLANG_CACHE_DIT_TAYLORSEER=true \ SGLANG_CACHE_DIT_TS_ORDER=2 \ sglang serve --model-path Tongyi-MAI/Z-Image-Turbo ``` #### 4.2.2 CPU Offload * `--dit-cpu-offload`: Use CPU offload for DiT inference. Enable if run out of memory. * `--text-encoder-cpu-offload`: Use CPU offload for text encoder inference. * `--vae-cpu-offload`: Use CPU offload for VAE. * `--pin-cpu-memory`: Pin memory for CPU offload. Only added as a temp workaround if it throws "CUDA error: invalid argument". ## 5. Benchmark Test Environment: * Hardware: AMD Instinct MI300X GPU (1x) * Model: Tongyi-MAI/Z-Image-Turbo * Docker Image: lmsysorg/sglang:v0.5.8-rocm700-mi30x * sglang diffusion version: 0.5.8 ### 5.1 Speedup Benchmark #### 5.1.1 Generate an image **Server Command**: ```shell Command theme={null} sglang serve --model-path Tongyi-MAI/Z-Image-Turbo \ --ulysses-degree=1 --ring-degree=1 --port 30000 ``` **Benchmark Command**: ```shell Command theme={null} python3 -m sglang.multimodal_gen.benchmarks.bench_serving \ --backend sglang-image --dataset vbench --task text-to-image --num-prompts 1 --max-concurrency 1 ``` **Result**: ```text Output theme={null} ================= Serving Benchmark Result ================= Task: text-to-image Model: Tongyi-MAI/Z-Image-Turbo Dataset: vbench -------------------------------------------------- Benchmark duration (s): 1.84 Request rate: inf Max request concurrency: 1 Successful requests: 1/1 -------------------------------------------------- Request throughput (req/s): 0.54 Latency Mean (s): 1.8435 Latency Median (s): 1.8435 Latency P99 (s): 1.8435 -------------------------------------------------- Peak Memory Max (MB): 30689.20 Peak Memory Mean (MB): 30689.20 Peak Memory Median (MB): 30689.20 ============================================================ ``` #### 5.1.2 Generate images with high concurrency **Benchmark Command**: ```shell Command theme={null} python3 -m sglang.multimodal_gen.benchmarks.bench_serving \ --backend sglang-image --dataset vbench --task text-to-image --num-prompts 20 --max-concurrency 20 ``` **Result**: ```text Output theme={null} ================= Serving Benchmark Result ================= Task: text-to-image Model: Tongyi-MAI/Z-Image-Turbo Dataset: vbench -------------------------------------------------- Benchmark duration (s): 35.32 Request rate: inf Max request concurrency: 20 Successful requests: 20/20 -------------------------------------------------- Request throughput (req/s): 0.57 Latency Mean (s): 18.5672 Latency Median (s): 18.5573 Latency P99 (s): 34.9880 -------------------------------------------------- Peak Memory Max (MB): 30689.26 Peak Memory Mean (MB): 30689.21 Peak Memory Median (MB): 30689.21 ============================================================ ```