> ## Documentation Index
> Fetch the complete documentation index at: https://docs.sglang.io/llms.txt
> Use this file to discover all available pages before exploring further.

# Wan2.1

export const Wan21Deployment = () => {
  const MODELSIZE_DEFS = [{
    id: '14b',
    label: '14B',
    subtitle: 'High-quality, 480P/720P',
    default: true,
    validTasks: ['t2v', 'i2v']
  }, {
    id: '1_3b',
    label: '1.3B',
    subtitle: 'Lightweight, 480P',
    default: false,
    validTasks: ['t2v']
  }];
  const modelConfigs = {
    't2v-14b': {
      repoId: 'Wan-AI/Wan2.1-T2V-14B-Diffusers',
      supportedLoras: [{
        id: 'general',
        label: 'General Wan2.1 LoRA',
        path: 'NIVEDAN/wan2.1-lora'
      }]
    },
    't2v-1_3b': {
      repoId: 'Wan-AI/Wan2.1-T2V-1.3B-Diffusers',
      supportedLoras: []
    },
    'i2v-14b': {
      repoId: 'Wan-AI/Wan2.1-I2V-14B-720P-Diffusers',
      supportedLoras: [{
        id: 'fight',
        label: 'Fight Style LoRA',
        path: 'valiantcat/Wan2.1-Fight-LoRA'
      }]
    }
  };
  const options = {
    hardware: {
      name: 'hardware',
      title: 'Hardware Platform',
      items: [{
        id: 'mi300x',
        label: 'MI300X/MI325X/MI355X',
        default: true
      }]
    },
    task: {
      name: 'task',
      title: 'Task Type',
      items: [{
        id: 't2v',
        label: 'Text-to-Video (T2V)',
        default: true
      }, {
        id: 'i2v',
        label: 'Image-to-Video (I2V)',
        default: false
      }]
    },
    modelsize: {
      name: 'modelsize',
      title: 'Model Variant',
      items: MODELSIZE_DEFS.map(({validTasks, ...rest}) => rest)
    },
    bestPractice: {
      name: 'bestPractice',
      title: 'Sequence Parallelism',
      items: [{
        id: 'off',
        label: 'Standard',
        default: true
      }, {
        id: 'on',
        label: 'Best Practice (4 GPUs)',
        default: false
      }]
    }
  };
  function modelSizeItemsForTask(task) {
    return MODELSIZE_DEFS.filter(item => item.validTasks.includes(task)).map(({validTasks, ...rest}) => rest);
  }
  const getInitialState = () => {
    const task = 't2v';
    const sizes = modelSizeItemsForTask(task);
    const modelsize = sizes.find(size => size.default)?.id || sizes[0].id;
    const configKey = `${task}-${modelsize}`;
    const supported = modelConfigs[configKey]?.supportedLoras || [];
    return {
      hardware: 'mi300x',
      task,
      modelsize,
      bestPractice: 'off',
      selectedLoraPath: supported[0]?.path ?? ''
    };
  };
  const [values, setValues] = useState(getInitialState);
  const [isDark, setIsDark] = useState(false);
  useEffect(() => {
    const checkDarkMode = () => {
      const html = document.documentElement;
      const isDarkMode = html.classList.contains('dark') || html.getAttribute('data-theme') === 'dark' || html.style.colorScheme === 'dark';
      setIsDark(isDarkMode);
    };
    checkDarkMode();
    const observer = new MutationObserver(checkDarkMode);
    observer.observe(document.documentElement, {
      attributes: true,
      attributeFilter: ['class', 'data-theme', 'style']
    });
    return () => observer.disconnect();
  }, []);
  const handleRadioChange = (optionName, itemId) => {
    setValues(prev => {
      let next = {
        ...prev,
        [optionName]: itemId
      };
      if (optionName === 'task') {
        const sizes = modelSizeItemsForTask(itemId);
        if (!sizes.some(size => size.id === next.modelsize)) {
          next.modelsize = sizes.find(size => size.default)?.id || sizes[0].id;
        }
      }
      if (optionName === 'task' || optionName === 'modelsize') {
        const configKey = `${next.task}-${next.modelsize}`;
        const supported = modelConfigs[configKey]?.supportedLoras || [];
        if (supported.length === 0) {
          next.selectedLoraPath = '';
        } else if (next.selectedLoraPath && !supported.some(lora => lora.path === next.selectedLoraPath)) {
          next.selectedLoraPath = supported[0].path;
        }
      }
      return next;
    });
  };
  const handleLoraToggle = path => {
    setValues(prev => ({
      ...prev,
      selectedLoraPath: prev.selectedLoraPath === path ? '' : path
    }));
  };
  const handleTextChange = (optionName, value) => {
    setValues(prev => ({
      ...prev,
      [optionName]: value
    }));
  };
  const generateCommand = () => {
    const {task, modelsize, selectedLoraPath, bestPractice} = values;
    const configKey = `${task}-${modelsize}`;
    const config = modelConfigs[configKey];
    if (!config) {
      return '# Error: Invalid configuration';
    }
    let command = `sglang serve \\\n  --model-path ${config.repoId} \\\n  --dit-layerwise-offload true`;
    if (bestPractice === 'on') {
      command += ` \\\n  --num-gpus 4 \\\n  --ulysses-degree 2 \\\n  --enable-cfg-parallel`;
    }
    if (selectedLoraPath) {
      command += ` \\\n  --lora-path ${selectedLoraPath}`;
    }
    return command;
  };
  const modelSizeItems = modelSizeItemsForTask(values.task);
  const loraConfigKey = `${values.task}-${values.modelsize}`;
  const availableLoras = modelConfigs[loraConfigKey]?.supportedLoras || [];
  const command = generateCommand();
  const containerStyle = {
    maxWidth: '900px',
    margin: '0 auto',
    display: 'flex',
    flexDirection: 'column',
    gap: '4px'
  };
  const cardStyle = {
    padding: '8px 12px',
    border: `1px solid ${isDark ? '#374151' : '#e5e7eb'}`,
    borderLeft: `3px solid ${isDark ? '#E85D4D' : '#D45D44'}`,
    borderRadius: '4px',
    display: 'flex',
    alignItems: 'center',
    gap: '12px',
    background: isDark ? '#1f2937' : '#fff'
  };
  const titleStyle = {
    fontSize: '13px',
    fontWeight: '600',
    minWidth: '140px',
    flexShrink: 0,
    color: isDark ? '#e5e7eb' : 'inherit'
  };
  const itemsStyle = {
    display: 'flex',
    rowGap: '2px',
    columnGap: '6px',
    flexWrap: 'wrap',
    alignItems: 'center',
    flex: 1
  };
  const labelBaseStyle = {
    padding: '4px 10px',
    border: `1px solid ${isDark ? '#9ca3af' : '#d1d5db'}`,
    borderRadius: '3px',
    cursor: 'pointer',
    display: 'inline-flex',
    flexDirection: 'column',
    alignItems: 'center',
    justifyContent: 'center',
    fontWeight: '500',
    fontSize: '13px',
    transition: 'all 0.2s',
    userSelect: 'none',
    minWidth: '45px',
    textAlign: 'center',
    flex: 1,
    background: isDark ? '#374151' : '#fff',
    color: isDark ? '#e5e7eb' : 'inherit'
  };
  const checkedStyle = {
    background: '#D45D44',
    color: 'white',
    borderColor: '#D45D44'
  };
  const disabledStyle = {
    cursor: 'not-allowed',
    opacity: 0.5
  };
  const subtitleStyle = {
    display: 'block',
    fontSize: '9px',
    marginTop: '1px',
    lineHeight: '1.1',
    opacity: 0.7
  };
  const textInputStyle = {
    flex: 1,
    padding: '8px 10px',
    borderRadius: '4px',
    border: `1px solid ${isDark ? '#4b5563' : '#d1d5db'}`,
    background: isDark ? '#111827' : '#fff',
    color: isDark ? '#e5e7eb' : '#111827',
    fontSize: '13px'
  };
  const commandDisplayStyle = {
    flex: 1,
    padding: '12px 16px',
    background: isDark ? '#111827' : '#f5f5f5',
    borderRadius: '6px',
    fontFamily: "'Menlo', 'Monaco', 'Courier New', monospace",
    fontSize: '12px',
    lineHeight: '1.5',
    color: isDark ? '#e5e7eb' : '#374151',
    whiteSpace: 'pre-wrap',
    overflowX: 'auto',
    margin: 0,
    border: `1px solid ${isDark ? '#374151' : '#e5e7eb'}`
  };
  return <div style={containerStyle} className="not-prose">
      {Object.entries(options).map(([key, option]) => <div key={key} style={cardStyle}>
          <div style={titleStyle}>{option.title}</div>
          <div style={itemsStyle}>
            {(key === 'modelsize' ? modelSizeItems : option.items).map(item => {
    const isChecked = values[option.name] === item.id;
    return <label key={item.id} style={{
      ...labelBaseStyle,
      ...isChecked ? checkedStyle : {}
    }}>
                  <input type="radio" name={option.name} value={item.id} checked={isChecked} onChange={() => handleRadioChange(option.name, item.id)} style={{
      display: 'none'
    }} />
                  {item.label}
                  {item.subtitle && <small style={{
      ...subtitleStyle,
      color: isChecked ? 'rgba(255,255,255,0.85)' : 'inherit'
    }}>
                      {item.subtitle}
                    </small>}
                </label>;
  })}
          </div>
        </div>)}

      {availableLoras.length > 0 && <div style={cardStyle}>
          <div style={titleStyle}>Select LoRA Model (Only some of the supported LoRAs are listed here)</div>
          <div style={itemsStyle}>
            {availableLoras.map(lora => {
    const isChecked = values.selectedLoraPath === lora.path;
    return <label key={lora.id} style={{
      ...labelBaseStyle,
      ...isChecked ? checkedStyle : {}
    }} onClick={event => {
      event.preventDefault();
      handleLoraToggle(lora.path);
    }}>
                  <input type="radio" name="selectedLoraPath" value={lora.path} checked={isChecked} readOnly style={{
      display: 'none'
    }} />
                  {lora.label}
                  <small style={{
      ...subtitleStyle,
      color: isChecked ? 'rgba(255,255,255,0.85)' : 'inherit'
    }}>
                    {lora.path}
                  </small>
                </label>;
  })}
          </div>
        </div>}

      <div style={cardStyle}>
        <div style={titleStyle}>Run this Command:</div>
        <pre style={commandDisplayStyle}>{command}</pre>
      </div>
    </div>;
};

## 1. Model Introduction

[Wan2.1 series](https://github.com/Wan-Video/Wan2.1) is an open and advanced suite of large-scale video generative models from Wan-AI.

Key characteristics:

* **State-of-the-art video quality**: Consistently outperforms many open-source and commercial video models on internal and public benchmarks, especially for motion richness and temporal consistency.
* **Consumer GPU friendly**: The T2V-1.3B variant can generate 5-second 480P videos on consumer GPUs with modest VRAM requirements.
* **Multi-capability suite**: Supports Text-to-Video (T2V), Image-to-Video (I2V), video editing, text-to-image, and video-to-audio generation.
* **Robust text rendering**: First-generation Wan model capable of generating both Chinese and English text in videos with strong readability.
* **Powerful Wan-VAE**: A 3D causal VAE that encodes/decodes long 1080P videos while preserving temporal information, enabling efficient high-resolution video generation.

For more details, refer to the official Wan2.1 resources:

* **GitHub**: [Wan-Video/Wan2.1](https://github.com/Wan-Video/Wan2.1)
* **Hugging Face collection**: [Wan-AI Wan2.1](https://huggingface.co/Wan-AI/Wan2.1-T2V-14B)

## 2. SGLang-diffusion Installation

SGLang-diffusion offers multiple installation methods. You can choose the most suitable installation method based on your hardware platform and requirements.

Please refer to the [official SGLang-diffusion installation guide](../../../docs/sglang-diffusion/installation) for installation instructions.

## 3. Model Deployment

This section provides deployment configurations optimized for different hardware platforms and use cases.

### 3.1 Basic Configuration

The Wan2.1 series offers models in multiple sizes and resolutions, optimized for different hardware platforms. The recommended launch configurations vary by hardware and model size.

**Interactive Command Generator**: Use the configuration selector below to automatically generate an appropriate deployment command for your model variant and options.

<Wan21Deployment />

### 3.2 Configuration Tips

Current supported optimization options are listed in the [SGLang diffusion support matrix](../../../docs/sglang-diffusion/attention_backends#platform-support-matrix).

* `--vae-path`: Path to a custom VAE model or HuggingFace model ID. If not specified, the VAE will be loaded from the main model path.
* `--num-gpus {NUM_GPUS}`: Number of GPUs to use.
* `--tp-size {TP_SIZE}`: Tensor parallelism size (for the encoder/DiT; keep (\leq 1) if relying heavily on CPU offload).
* `--sp-degree {SP_SIZE}`: Sequence parallelism degree.
* `--ulysses-degree {ULYSSES_DEGREE}`: Degree of DeepSpeed-Ulysses-style SP in USP.
* `--ring-degree {RING_DEGREE}`: Degree of ring attention-style SP in USP.
* `--text-encoder-cpu-offload`, `--dit-cpu-offload`, `--vae-cpu-offload`: Use CPU offload to reduce peak GPU memory when needed.

## 4. Model Invocation

### 4.1 Basic Usage

For more API usage and request examples, please refer to:
[SGLang Diffusion OpenAI API](../../../docs/sglang-diffusion/api/openai_api)

#### 4.1.1 Launch a server and then send requests

```bash Command theme={null}
sglang serve --model-path Wan-AI/Wan2.1-T2V-14B-Diffusers --port 30000

curl http://127.0.0.1:30000/v1/images/generations \
  -o >(jq -r '.data[0].b64_json' | base64 --decode > example.png) \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer $OPENAI_API_KEY" \
  -d '{
    "model": "Wan-AI/Wan2.1-T2V-14B-Diffusers",
    "prompt": "A cute baby sea otter",
    "n": 1,
    "size": "1024x1024",
    "response_format": "b64_json"
  }'
```

#### 4.1.2 Generate a video without launching a server

```bash Command theme={null}
SERVER_ARGS=(
  --model-path Wan-AI/Wan2.1-T2V-14B-Diffusers
  --text-encoder-cpu-offload
  --pin-cpu-memory
  --num-gpus 4
  --ulysses-degree=2
  --enable-cfg-parallel
)

SAMPLING_ARGS=(
  --prompt "A curious raccoon"
  --save-output
  --output-path outputs
  --output-file-name "A curious raccoon.mp4"
)

sglang generate "${SERVER_ARGS[@]}" "${SAMPLING_ARGS[@]}"
```

### 4.2 Advanced Usage

#### 4.2.1 Cache-DiT Acceleration

SGLang integrates [Cache-DiT](https://github.com/vipshop/cache-dit), a caching acceleration engine for Diffusion Transformers (DiT), to achieve significant inference speedups with minimal quality loss. You can set `SGLANG_CACHE_DIT_ENABLED=True` to enable it. For more details, please refer to the SGLang Cache-DiT [documentation](../../../docs/sglang-diffusion/cache_dit).

**Basic Usage**

```bash Command theme={null}
SGLANG_CACHE_DIT_ENABLED=true sglang serve --model-path Wan-AI/Wan2.1-T2V-14B-Diffusers
```

**Advanced Usage**

Combined Configuration Example:

```bash Command theme={null}
SGLANG_CACHE_DIT_ENABLED=true \
SGLANG_CACHE_DIT_FN=2 \
SGLANG_CACHE_DIT_BN=1 \
SGLANG_CACHE_DIT_WARMUP=4 \
SGLANG_CACHE_DIT_RDT=0.4 \
SGLANG_CACHE_DIT_MC=4 \
SGLANG_CACHE_DIT_TAYLORSEER=true \
SGLANG_CACHE_DIT_TS_ORDER=2 \
sglang serve --model-path Wan-AI/Wan2.1-T2V-14B-Diffusers
```

#### 4.2.2 GPU Optimization

* `--dit-cpu-offload`: Use CPU offload for DiT inference. Enable if you run out of memory with FSDP.
* `--text-encoder-cpu-offload`: Use CPU offload for text encoder inference.
* `--image-encoder-cpu-offload`: Use CPU offload for image encoder inference.
* `--vae-cpu-offload`: Use CPU offload for VAE.
* `--pin-cpu-memory`: Pin memory for CPU offload. Use as a workaround if you see "CUDA error: invalid argument".

#### 4.2.3 Supported LoRA Registry

SGLang supports applying Wan2.1 LoRA adapters on top of base models:

<table style={{width: "100%", borderCollapse: "collapse", tableLayout: "fixed"}}>
  <colgroup>
    <col style={{width: "50%"}} />

    <col style={{width: "50%"}} />
  </colgroup>

  <thead>
    <tr style={{borderBottom: "2px solid #d55816"}}>
      <th style={{textAlign: "left", padding: "10px 12px", fontWeight: 700, whiteSpace: "nowrap", backgroundColor: "rgba(255,255,255,0.02)"}}>origin model</th>
      <th style={{textAlign: "left", padding: "10px 12px", fontWeight: 700, whiteSpace: "nowrap", backgroundColor: "rgba(255,255,255,0.05)"}}>supported LoRA</th>
    </tr>
  </thead>

  <tbody>
    <tr>
      <td style={{padding: "9px 12px", fontWeight: 500, backgroundColor: "rgba(255,255,255,0.02)"}}>[Wan-AI/Wan2.1-T2V-14B](https://huggingface.co/Wan-AI/Wan2.1-T2V-14B)</td>
      <td style={{padding: "9px 12px", backgroundColor: "rgba(255,255,255,0.05)"}}>[NIVEDAN/wan2.1-lora](https://huggingface.co/NIVEDAN/wan2.1-lora)</td>
    </tr>

    <tr>
      <td style={{padding: "9px 12px", fontWeight: 500, backgroundColor: "rgba(255,255,255,0.02)"}}>[Wan-AI/Wan2.1-I2V-14B-720P](https://huggingface.co/Wan-AI/Wan2.1-I2V-14B-720P)</td>
      <td style={{padding: "9px 12px", backgroundColor: "rgba(255,255,255,0.05)"}}>[valiantcat/Wan2.1-Fight-LoRA](https://huggingface.co/valiantcat/Wan2.1-Fight-LoRA)</td>
    </tr>
  </tbody>
</table>

**Example**:

```bash Command theme={null}
sglang serve --model-path Wan-AI/Wan2.1-T2V-14B-Diffusers --port 30000 \
    --lora-path NIVEDAN/wan2.1-lora
```

## 5. Benchmark

Test Environment:

* Hardware: AMD MI300X GPU (1x)
* Model: Wan-AI/Wan2.1-T2V-14B-Diffusers
* SGLang Docker Image Version: 0.5.9

### 5.1 How to Run Benchmarks with SGLang

You can use the built-in SGLang diffusion benchmark script to evaluate Wan2.1 performance on your hardware.

#### 5.1.1 Generate a single video

**Server Command**:

```bash Command theme={null}
sglang serve --model-path Wan-AI/Wan2.1-T2V-14B-Diffusers
```

**Benchmark Command**:

```bash Command theme={null}
python3 -m sglang.multimodal_gen.benchmarks.bench_serving \
    --backend sglang-video --dataset vbench --task text-to-video --num-prompts 1 --max-concurrency 1
```

**Result**:

```text Output theme={null}
================= Serving Benchmark Result =================
Task:                                    text-to-video
Model:                                   Wan-AI/Wan2.1-T2V-14B-Diffusers
Dataset:                                 vbench
--------------------------------------------------
Benchmark duration (s):                  1958.41
Request rate:                            inf
Max request concurrency:                 1
Successful requests:                     1/1
--------------------------------------------------
Request throughput (req/s):              0.00
Latency Mean (s):                        1958.4059
Latency Median (s):                      1958.4059
Latency P99 (s):                         1958.4059
--------------------------------------------------
Peak Memory Max (MB):                    59662.00
Peak Memory Mean (MB):                   59662.00
Peak Memory Median (MB):                 59662.00
============================================================
```

#### 5.1.2 Generate videos with Cache-DiT acceleration

**Server Command**:

```bash Command theme={null}
SGLANG_CACHE_DIT_ENABLED=true \
SGLANG_CACHE_DIT_FN=2 \
SGLANG_CACHE_DIT_BN=1 \
SGLANG_CACHE_DIT_WARMUP=4 \
SGLANG_CACHE_DIT_RDT=0.4 \
SGLANG_CACHE_DIT_MC=4 \
SGLANG_CACHE_DIT_TAYLORSEER=true \
SGLANG_CACHE_DIT_TS_ORDER=2 \
sglang serve --model-path Wan-AI/Wan2.1-T2V-14B-Diffusers
```

**Benchmark Command**:

```bash Command theme={null}
python3 -m sglang.multimodal_gen.benchmarks.bench_serving \
    --backend sglang-video --dataset vbench --task text-to-video --num-prompts 1 --max-concurrency 1
```

**Result**:

```text Output theme={null}
================= Serving Benchmark Result =================
Task:                                    text-to-video
Model:                                   Wan-AI/Wan2.1-T2V-14B-Diffusers
Dataset:                                 vbench
--------------------------------------------------
Benchmark duration (s):                  556.99
Request rate:                            inf
Max request concurrency:                 1
Successful requests:                     1/1
--------------------------------------------------
Request throughput (req/s):              0.00
Latency Mean (s):                        556.9885
Latency Median (s):                      556.9885
Latency P99 (s):                         556.9885
--------------------------------------------------
Peak Memory Max (MB):                    69306.00
Peak Memory Mean (MB):                   69306.00
Peak Memory Median (MB):                 69306.00
============================================================
```
