> ## Documentation Index
> Fetch the complete documentation index at: https://docs.sglang.io/llms.txt
> Use this file to discover all available pages before exploring further.

# Ring-2.5-1T

export const Ring251TDeployment = () => {
  const options = {
    hardware: {
      name: 'hardware',
      title: 'Hardware Platform',
      items: [{
        id: 'h200',
        label: 'H200',
        default: true
      }, {
        id: 'b200',
        label: 'B200',
        default: false
      }, {
        id: 'b300',
        label: 'B300',
        default: false
      }, {
        id: 'gb200',
        label: 'GB200',
        default: false
      }, {
        id: 'gb300',
        label: 'GB300',
        default: false
      }, {
        id: 'mi300x',
        label: 'MI300X',
        default: false
      }, {
        id: 'mi325x',
        label: 'MI325X',
        default: false
      }, {
        id: 'mi355x',
        label: 'MI355X',
        default: false
      }]
    },
    reasoning: {
      name: 'reasoning',
      title: 'Reasoning Parser',
      items: [{
        id: 'disabled',
        label: 'Disabled',
        default: true
      }, {
        id: 'enabled',
        label: 'Enabled',
        default: false
      }]
    },
    toolcall: {
      name: 'toolcall',
      title: 'Tool Call Parser',
      items: [{
        id: 'disabled',
        label: 'Disabled',
        default: true
      }, {
        id: 'enabled',
        label: 'Enabled',
        default: false
      }]
    }
  };
  const modelConfigs = {
    h200: {
      fp8: {
        tp: 8
      }
    },
    b200: {
      fp8: {
        tp: 8
      }
    },
    b300: {
      fp8: {
        tp: 8
      }
    },
    gb200: {
      fp8: {
        tp: 4
      }
    },
    gb300: {
      fp8: {
        tp: 4
      }
    },
    mi300x: {
      fp8: {
        tp: 8,
        pp: 2,
        nnodes: 2
      }
    },
    mi325x: {
      fp8: {
        tp: 8,
        pp: 2,
        nnodes: 2
      }
    },
    mi355x: {
      fp8: {
        tp: 8
      }
    }
  };
  const getInitialState = () => {
    const initialState = {};
    Object.entries(options).forEach(([key, option]) => {
      if (option.type === 'checkbox') {
        initialState[key] = option.items.filter(item => item.default).map(item => item.id);
      } else {
        const defaultItem = option.items.find(item => item.default);
        initialState[key] = defaultItem ? defaultItem.id : option.items[0].id;
      }
    });
    return initialState;
  };
  const [values, setValues] = useState(getInitialState);
  const [isDark, setIsDark] = useState(false);
  useEffect(() => {
    const checkDarkMode = () => {
      const html = document.documentElement;
      const isDarkMode = html.classList.contains('dark') || html.getAttribute('data-theme') === 'dark' || html.style.colorScheme === 'dark';
      setIsDark(isDarkMode);
    };
    checkDarkMode();
    const observer = new MutationObserver(checkDarkMode);
    observer.observe(document.documentElement, {
      attributes: true,
      attributeFilter: ['class', 'data-theme', 'style']
    });
    return () => observer.disconnect();
  }, []);
  const handleRadioChange = (optionName, value) => {
    setValues(prev => ({
      ...prev,
      [optionName]: value
    }));
  };
  const handleCheckboxChange = (optionName, itemId, isChecked) => {
    setValues(prev => {
      const currentValues = prev[optionName] || [];
      if (isChecked) {
        return {
          ...prev,
          [optionName]: [...currentValues, itemId]
        };
      } else {
        return {
          ...prev,
          [optionName]: currentValues.filter(id => id !== itemId)
        };
      }
    });
  };
  const generateCommand = () => {
    const {hardware, reasoning, toolcall} = values;
    const modelName = 'inclusionAI/Ring-2.5-1T';
    const amdMultiNode = hardware === 'mi300x' || hardware === 'mi325x';
    const extraFlags = [];
    if (reasoning === 'enabled') extraFlags.push('--reasoning-parser deepseek-r1');
    if (toolcall === 'enabled') extraFlags.push('--tool-call-parser qwen');
    if (amdMultiNode) {
      const hwConfig = modelConfigs[hardware].fp8;
      const tpSize = hwConfig.tp;
      const ppSize = hwConfig.pp;
      const buildAmdNodeCmd = nodeRank => {
        let cmd = 'sglang serve \\\n';
        cmd += `--model-path ${modelName} \\\n`;
        cmd += '--trust-remote-code \\\n';
        cmd += `--tp-size ${tpSize} \\\n`;
        cmd += `--pp-size ${ppSize} \\\n`;
        cmd += `--nnodes ${hwConfig.nnodes} \\\n`;
        cmd += `--node-rank ${nodeRank} \\\n`;
        if (nodeRank === 0) {
          cmd += '--host 0.0.0.0 \\\n';
          cmd += '--port 30000 \\\n';
        }
        cmd += '--dist-init-addr ${MASTER_IP}:${DIST_PORT} \\\n';
        cmd += '--attention-backend triton \\\n';
        cmd += '--model-loader-extra-config \'{"enable_multithread_load": "true","num_threads": 64}\' \\\n';
        cmd += '--mem-frac 0.95';
        extraFlags.forEach(flag => {
          cmd += ` \\\n${flag}`;
        });
        return cmd;
      };
      const envBlock = 'export MASTER_IP=<your-node0-ip> # Replace with the IP of Node 0\n' + 'export PORT=30000\n' + 'export DIST_PORT=20000\n' + '# Replace <nic-ifname> with your actual NIC interface name\n' + 'export GLOO_SOCKET_IFNAME=<nic-ifname>\n' + 'export TP_SOCKET_IFNAME=<nic-ifname>\n';
      let out = envBlock + '\n';
      out += '\n# Node 0:\n';
      out += buildAmdNodeCmd(0);
      out += '\n\n\n# Node 1:\n';
      out += buildAmdNodeCmd(1);
      return out;
    }
    const hwConfig = modelConfigs[hardware].fp8;
    const tpValue = hwConfig.tp;
    let cmd = 'sglang serve \\\n';
    cmd += `  --model-path ${modelName}`;
    cmd += ` \\\n  --tp ${tpValue}`;
    cmd += ' \\\n  --trust-remote-code';
    if (hardware === 'b300') {
      cmd += ' \\\n  --attention-backend flashinfer';
    }
    extraFlags.forEach(flag => {
      cmd += ` \\\n  ${flag}`;
    });
    return cmd;
  };
  const containerStyle = {
    maxWidth: '900px',
    margin: '0 auto',
    display: 'flex',
    flexDirection: 'column',
    gap: '4px'
  };
  const cardStyle = {
    padding: '8px 12px',
    border: `1px solid ${isDark ? '#374151' : '#e5e7eb'}`,
    borderLeft: `3px solid ${isDark ? '#E85D4D' : '#D45D44'}`,
    borderRadius: '4px',
    display: 'flex',
    alignItems: 'center',
    gap: '12px',
    background: isDark ? '#1f2937' : '#fff'
  };
  const titleStyle = {
    fontSize: '13px',
    fontWeight: '600',
    minWidth: '140px',
    flexShrink: 0,
    color: isDark ? '#e5e7eb' : 'inherit'
  };
  const itemsStyle = {
    display: 'flex',
    rowGap: '2px',
    columnGap: '6px',
    flexWrap: 'wrap',
    alignItems: 'center',
    flex: 1
  };
  const labelBaseStyle = {
    padding: '4px 10px',
    border: `1px solid ${isDark ? '#9ca3af' : '#d1d5db'}`,
    borderRadius: '3px',
    cursor: 'pointer',
    display: 'inline-flex',
    flexDirection: 'column',
    alignItems: 'center',
    justifyContent: 'center',
    fontWeight: '500',
    fontSize: '13px',
    transition: 'all 0.2s',
    userSelect: 'none',
    minWidth: '45px',
    textAlign: 'center',
    flex: 1,
    background: isDark ? '#374151' : '#fff',
    color: isDark ? '#e5e7eb' : 'inherit'
  };
  const checkedStyle = {
    background: '#D45D44',
    color: 'white',
    borderColor: '#D45D44'
  };
  const disabledStyle = {
    cursor: 'not-allowed',
    opacity: 0.5
  };
  const subtitleStyle = {
    display: 'block',
    fontSize: '9px',
    marginTop: '1px',
    lineHeight: '1.1',
    opacity: 0.7
  };
  const commandDisplayStyle = {
    flex: 1,
    padding: '12px 16px',
    background: isDark ? '#111827' : '#f5f5f5',
    borderRadius: '6px',
    fontFamily: "'Menlo', 'Monaco', 'Courier New', monospace",
    fontSize: '12px',
    lineHeight: '1.5',
    color: isDark ? '#e5e7eb' : '#374151',
    whiteSpace: 'pre-wrap',
    overflowX: 'auto',
    margin: 0,
    border: `1px solid ${isDark ? '#374151' : '#e5e7eb'}`
  };
  return <div style={containerStyle} className="not-prose">
      {Object.entries(options).map(([key, option]) => <div key={key} style={cardStyle}>
          <div style={titleStyle}>{option.title}</div>
          <div style={itemsStyle}>
            {option.type === 'checkbox' ? option.items.map(item => {
    const isChecked = (values[option.name] || []).includes(item.id);
    const isItemDisabled = item.required;
    return <label key={item.id} style={{
      ...labelBaseStyle,
      ...isChecked ? checkedStyle : {},
      ...isItemDisabled ? disabledStyle : {}
    }}>
                    <input type="checkbox" checked={isChecked} disabled={isItemDisabled} onChange={e => handleCheckboxChange(option.name, item.id, e.target.checked)} style={{
      display: 'none'
    }} />
                    {item.label}
                    {item.subtitle && <small style={{
      ...subtitleStyle,
      color: isChecked ? 'rgba(255,255,255,0.85)' : 'inherit'
    }}>{item.subtitle}</small>}
                  </label>;
  }) : option.items.map(item => {
    const isChecked = values[option.name] === item.id;
    return <label key={item.id} style={{
      ...labelBaseStyle,
      ...isChecked ? checkedStyle : {}
    }}>
                    <input type="radio" name={option.name} value={item.id} checked={isChecked} onChange={() => handleRadioChange(option.name, item.id)} style={{
      display: 'none'
    }} />
                    {item.label}
                    {item.subtitle && <small style={{
      ...subtitleStyle,
      color: isChecked ? 'rgba(255,255,255,0.85)' : 'inherit'
    }}>{item.subtitle}</small>}
                  </label>;
  })}
          </div>
        </div>)}
      <div style={cardStyle}>
        <div style={titleStyle}>Run this Command:</div>
        <pre style={commandDisplayStyle}>{generateCommand()}</pre>
      </div>
    </div>;
};

## 1. Model Introduction

[Ring-2.5-1T](https://huggingface.co/inclusionAI/Ring-2.5-1T) is the world's first open-source trillion-parameter reasoning model based on hybrid linear attention architecture, developed by InclusionAI. Building on Ring-1T, Ring-2.5-1T demonstrates substantial improvements in generation efficiency, reasoning depth, and long-horizon task execution capabilities.

**Key Features:**

* **Trillion-Scale Model**: \~1T total parameters with 63B activation parameters using a hybrid linear attention architecture (1:7 MLA + Lightning Linear Attention)
* **Generation Efficiency**: Reduces memory access overhead by over 10x and increases generation throughput by more than 3x for sequences exceeding 32K tokens
* **Deep Reasoning**: Achieves gold medal level for both IMO 2025 and CMO 2025, with dense rewards for rigorous reasoning process feedback
* **Long-horizon Task Execution**: Enhanced autonomous execution capability through large-scale fully-async agentic RL training
* **Tool Calling**: Supports function calling with XML-style tool call format
* **Context Length**: 128K -> 256K (YaRN)

**Available Models:**

* **FP8 (8-bit quantized)**: [inclusionAI/Ring-2.5-1T](https://huggingface.co/inclusionAI/Ring-2.5-1T)

**License:** MIT

## 2. SGLang Installation

Ring-2.5-1T runs on the standard SGLang Docker image:

```bash Command theme={null}
# NVIDIA (H200 / B200 / GB200 / GB300)
docker pull lmsysorg/sglang:latest

# For MI300X/325X
docker pull lmsysorg/sglang:v0.5.9-rocm700-mi30x

# For MI355X
docker pull lmsysorg/sglang:v0.5.9-rocm700-mi35x
```

For other installation methods, please refer to the [official SGLang installation guide](../../../docs/get-started/install).

## 3. Model Deployment

This section provides deployment configurations optimized for different hardware platforms.

### 3.1 Basic Configuration

**Interactive Command Generator**: Use the configuration selector below to automatically generate the appropriate deployment command for your hardware platform.

<Ring251TDeployment />

### 3.2 Configuration Tips

* The `--trust-remote-code` flag is required for this model due to custom modeling code.
* The model uses FP8 quantization (compressed-tensors format).

## 4. Model Invocation

Deploy Ring-2.5-1T with the following command (on H200, all features enabled):

```shell Command theme={null}
sglang serve \
  --model-path inclusionAI/Ring-2.5-1T \
  --tp 8 \
  --trust-remote-code \
  --host 0.0.0.0 \
  --port 30000
```

### 4.1 Basic Usage

For basic API usage and request examples, please refer to:

* [SGLang Basic Usage Guide](../../../docs/basic_usage/send_request)

### 4.2 Advanced Usage

#### 4.2.1 Reasoning Parser

To enable reasoning output separation, add `--reasoning-parser deepseek-r1` when launching the server. The thinking process is returned via `reasoning_content` in the streaming response.

```shell Command theme={null}
sglang serve \
  --model-path inclusionAI/Ring-2.5-1T \
  --tp 8 \
  --trust-remote-code \
  --reasoning-parser deepseek-r1 \
  --host 0.0.0.0 \
  --port 30000
```

```python Example theme={null}
from openai import OpenAI

client = OpenAI(
    base_url="http://localhost:30000/v1",
    api_key="EMPTY"
)

response = client.chat.completions.create(
    model="inclusionAI/Ring-2.5-1T",
    messages=[
        {"role": "user", "content": "Solve this problem step by step: What is 15% of 240?"}
    ],
    max_tokens=2048,
    stream=True
)

for chunk in response:
    if chunk.choices and len(chunk.choices) > 0:
        delta = chunk.choices[0].delta

        if hasattr(delta, 'reasoning_content') and delta.reasoning_content:
            print(delta.reasoning_content, end="", flush=True)

        if delta.content:
            print(delta.content, end="", flush=True)

print()
```

<details>
  <summary>Output Example</summary>

  ```text Output theme={null}
  We are asked: "Solve this problem step by step: What is 15% of 240?" This is a straightforward percentage calculation. We need to show step-by-step solution.

  We can compute 15% of 240 as (15/100)*240 = 0.15 * 240 = 36.

  But we need to present step by step. Also ensure it's clear.

  We could also break down: 10% of 240 = 24, then 5% = 12, so 15% = 36.

  But any method is fine.

  We'll produce a solution with explanation: "To find 15% of 240, multiply 240 by 0.15 (or 15/100)."

  We'll show:

  15% = 15/100 = 0.15

  Then 0.15 × 240 = 36.

  Alternatively: (15/100) × 240 = (15 × 240) / 100 = 3600/100 = 36.

  Finally, answer: 36.

  We can also illustrate stepwise: "First, convert the percentage to a decimal: 15% = 0.15. Then multiply by the number: 0.15 × 240 = 36."

  We'll present as a final answer: \boxed{36}.

  However, we need to provide step-by-step solution as per instructions. We'll write a full explanation.

  We can also use the fraction method: 15% of 240 = (15/100)*240 = (15*240)/100 = 3600/100 = 36.

  Alr.

  I think that's it.


  **Step 1:** Write 15% as a fraction or decimal.
  \[ 15\% = \frac{15}{100} = 0.15\]

  **Step 2:** Multiply the number (240) by this fraction/decimal.
  \[ 240 \times 0.15 = 36\]

  Alternatively, using the fraction:
  \[ \frac{15}{100} \times 240 = \frac{15 \times 240}{100} = \frac{3600}{100} = 36\]

  **Conclusion:** 15% of 240 is 36.

  \[ \boxed{36} \]
  ```
</details>

#### 4.2.2 Tool Calling

To enable tool calling, add `--tool-call-parser qwen` when launching the server.

```shell Command theme={null}
sglang serve \
  --model-path inclusionAI/Ring-2.5-1T \
  --tp 8 \
  --trust-remote-code \
  --tool-call-parser qwen \
  --host 0.0.0.0 \
  --port 30000
```

```python Example theme={null}
from openai import OpenAI

client = OpenAI(
    base_url="http://localhost:30000/v1",
    api_key="EMPTY"
)

tools = [
    {
        "type": "function",
        "function": {
            "name": "get_weather",
            "description": "Get the current weather for a location",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "The city name"
                    }
                },
                "required": ["location"]
            }
        }
    }
]

response = client.chat.completions.create(
    model="inclusionAI/Ring-2.5-1T",
    messages=[
        {"role": "user", "content": "What's the weather in Beijing?"}
    ],
    tools=tools
)

print(response.choices[0].message.tool_calls)
```

**Output Example:**

```text Output theme={null}
[ChatCompletionMessageFunctionToolCall(id='call_770360e31d194ed79d32cd8c', function=Function(arguments='{"location": "Beijing"}', name='get_weather'), type='function', index=0)]
```

## 5. Benchmark

### GSM8K

* Deployment Command

```bash Command theme={null}
sglang serve \
  --model-path inclusionAI/Ring-2.5-1T \
  --tp-size 8 \
  --trust-remote-code
```

* Benchmark Command

```bash Command theme={null}
python3 benchmark/gsm8k/bench_sglang.py --temperature 1.2 --top-p 0.8 --max-new-tokens 32768 --num-questions 200 --tokenizer-path inclusionAI/Ring-2.5-1T --enable-thinking
```

* Test Result

```text Output theme={null}
Accuracy: 0.955
Invalid: 0.010
Latency: 615.833 s
Output throughput: 412.360 token/s
```