> ## Documentation Index
> Fetch the complete documentation index at: https://docs.sglang.io/llms.txt
> Use this file to discover all available pages before exploring further.

# Intern-S2-Preview

export const InternS2PreviewDeployment = () => {
  const options = {
    hardware: {
      name: 'hardware',
      title: 'Hardware Platform',
      items: [{
        id: 'h200',
        label: 'H200',
        default: true
      }]
    },
    reasoning: {
      name: 'reasoning',
      title: 'Reasoning Parser',
      items: [{
        id: 'disabled',
        label: 'Disabled',
        default: false
      }, {
        id: 'enabled',
        label: 'Enabled',
        default: true
      }]
    },
    toolcall: {
      name: 'toolcall',
      title: 'Tool Call Parser',
      items: [{
        id: 'disabled',
        label: 'Disabled',
        default: false
      }, {
        id: 'enabled',
        label: 'Enabled',
        default: true
      }]
    },
    mtp: {
      name: 'mtp',
      title: 'Multi-Token Prediction',
      items: [{
        id: 'disabled',
        label: 'Disabled',
        default: true
      }, {
        id: 'enabled',
        label: 'Enabled',
        default: false
      }]
    }
  };
  const getInitialState = () => {
    const initialState = {};
    Object.entries(options).forEach(([key, option]) => {
      const defaultItem = option.items.find(item => item.default);
      initialState[key] = defaultItem ? defaultItem.id : option.items[0].id;
    });
    return initialState;
  };
  const [values, setValues] = useState(getInitialState);
  const [isDark, setIsDark] = useState(false);
  useEffect(() => {
    const checkDarkMode = () => {
      const html = document.documentElement;
      const isDarkMode = html.classList.contains('dark') || html.getAttribute('data-theme') === 'dark' || html.style.colorScheme === 'dark';
      setIsDark(isDarkMode);
    };
    checkDarkMode();
    const observer = new MutationObserver(checkDarkMode);
    observer.observe(document.documentElement, {
      attributes: true,
      attributeFilter: ['class', 'data-theme', 'style']
    });
    return () => observer.disconnect();
  }, []);
  const handleRadioChange = (optionName, value) => {
    setValues(prev => ({
      ...prev,
      [optionName]: value
    }));
  };
  const generateCommand = () => {
    const {reasoning, toolcall, mtp} = values;
    const tpValue = 8;
    const flags = [];
    flags.push('  --model-path internLM/Intern-S2-Preview');
    flags.push(`  --tp ${tpValue}`);
    if (reasoning === 'enabled') flags.push('  --reasoning-parser qwen3');
    if (toolcall === 'enabled') flags.push('  --tool-call-parser qwen3_coder');
    if (mtp === 'enabled') {
      flags.push('  --mamba-scheduler-strategy extra_buffer');
      flags.push("  --speculative-algo 'NEXTN'");
      flags.push('  --speculative-eagle-topk 1');
      flags.push('  --speculative-num-steps 3');
      flags.push('  --speculative-num-draft-tokens 4');
    }
    flags.push('  --mem-fraction-static 0.8');
    flags.push('  --host 0.0.0.0');
    flags.push('  --port 30000');
    const envPrefix = mtp === 'enabled' ? 'SGLANG_ENABLE_SPEC_V2=1 \\\n' : '';
    return `${envPrefix}sglang serve \\\n${flags.join(' \\\n')}`;
  };
  const containerStyle = {
    maxWidth: '900px',
    margin: '0 auto',
    display: 'flex',
    flexDirection: 'column',
    gap: '4px'
  };
  const cardStyle = {
    padding: '8px 12px',
    border: `1px solid ${isDark ? '#374151' : '#e5e7eb'}`,
    borderLeft: `3px solid ${isDark ? '#E85D4D' : '#D45D44'}`,
    borderRadius: '4px',
    display: 'flex',
    alignItems: 'center',
    gap: '12px',
    background: isDark ? '#1f2937' : '#fff'
  };
  const titleStyle = {
    fontSize: '13px',
    fontWeight: '600',
    minWidth: '140px',
    flexShrink: 0,
    color: isDark ? '#e5e7eb' : 'inherit'
  };
  const itemsStyle = {
    display: 'flex',
    rowGap: '2px',
    columnGap: '6px',
    flexWrap: 'wrap',
    alignItems: 'center',
    flex: 1
  };
  const labelBaseStyle = {
    padding: '4px 10px',
    border: `1px solid ${isDark ? '#9ca3af' : '#d1d5db'}`,
    borderRadius: '3px',
    cursor: 'pointer',
    display: 'inline-flex',
    flexDirection: 'column',
    alignItems: 'center',
    justifyContent: 'center',
    fontWeight: '500',
    fontSize: '13px',
    transition: 'all 0.2s',
    userSelect: 'none',
    minWidth: '45px',
    textAlign: 'center',
    flex: 1,
    background: isDark ? '#374151' : '#fff',
    color: isDark ? '#e5e7eb' : 'inherit'
  };
  const checkedStyle = {
    background: '#D45D44',
    color: 'white',
    borderColor: '#D45D44'
  };
  const subtitleStyle = {
    display: 'block',
    fontSize: '9px',
    marginTop: '1px',
    lineHeight: '1.1',
    opacity: 0.7
  };
  const commandDisplayStyle = {
    flex: 1,
    padding: '12px 16px',
    background: isDark ? '#111827' : '#f5f5f5',
    borderRadius: '6px',
    fontFamily: "'Menlo', 'Monaco', 'Courier New', monospace",
    fontSize: '12px',
    lineHeight: '1.5',
    color: isDark ? '#e5e7eb' : '#374151',
    whiteSpace: 'pre-wrap',
    overflowX: 'auto',
    margin: 0,
    border: `1px solid ${isDark ? '#374151' : '#e5e7eb'}`
  };
  return <div style={containerStyle} className="not-prose">
      {Object.entries(options).map(([key, option]) => <div key={key} style={cardStyle}>
          <div style={titleStyle}>{option.title}</div>
          <div style={itemsStyle}>
            {option.items.map(item => {
    const isChecked = values[option.name] === item.id;
    return <label key={item.id} style={{
      ...labelBaseStyle,
      ...isChecked ? checkedStyle : {}
    }}>
                  <input type="radio" name={option.name} value={item.id} checked={isChecked} onChange={() => handleRadioChange(option.name, item.id)} style={{
      display: 'none'
    }} />
                  {item.label}
                  {item.subtitle && <small style={{
      ...subtitleStyle,
      color: isChecked ? 'rgba(255,255,255,0.85)' : 'inherit'
    }}>
                      {item.subtitle}
                    </small>}
                </label>;
  })}
          </div>
        </div>)}
      <div style={cardStyle}>
        <div style={titleStyle}>Run this Command:</div>
        <pre style={commandDisplayStyle}>{generateCommand()}</pre>
      </div>
    </div>;
};

## 1. Model Introduction

**Intern-S2-Preview** is an efficient 35B scientific multimodal foundation model. Beyond conventional parameter and data scaling, Intern-S2-Preview explores task scaling: increasing the difficulty, diversity, and coverage of scientific tasks to further unlock model capabilities.

**Resources:**

* HuggingFace: [internLM/Intern-S2-Preview](https://huggingface.co/internLM/Intern-S2-Preview)

## 2. SGLang Installation

SGLang offers multiple installation methods. Please refer to the [official SGLang installation guide](../../../docs/get-started/install) for installation instructions.

Install SGLang from source or use an NVIDIA Docker image:

```bash Command theme={null}
# Install from source
uv pip install 'git+https://github.com/sgl-project/sglang.git#subdirectory=python'

# Or use Docker for NVIDIA GPUs
docker pull lmsysorg/sglang:latest
```

For how to actually launch a docker image, see [Install → Method 3: Using Docker](../../../docs/get-started/install#method-3-using-docker). A minimal example (substitute the inner `sglang serve ...` with whatever the [command generator](#3-model-deployment) below produces):

```bash Command theme={null}
docker run --gpus all \
    --shm-size 32g \
    -p 30000:30000 \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HF_TOKEN=<your-hf-token>" \
    --ipc=host \
    lmsysorg/sglang:latest \
    sglang serve <use args below>
```

## 3. Model Deployment

### 3.1 Basic Configuration

**Interactive Command Generator**: Use the selector below to generate the deployment command for your hardware and parser configuration.

<InternS2PreviewDeployment />

### 3.2 Configuration Tips

* Use `tp>=2` for the NVIDIA deployment commands.
* Use `--reasoning-parser qwen3` to separate reasoning content from final content in streaming responses.
* Use `--tool-call-parser qwen3_coder` when serving tool-calling workloads.
* Add `--mamba-scheduler-strategy extra_buffer` with `--speculative-algo 'NEXTN'` to enable MTP.
* If weight loading is slow, add `--model-loader-extra-config='{"enable_multithread_load": "true", "num_threads": 64}'`.

## 4. Model Invocation

### 4.1 Basic Usage

For basic API usage and request examples, see:

* [Basic API Usage](../../../docs/basic_usage/send_request)

### 4.2 Advanced Usage

#### 4.2.1 Vision Input

Intern-S2-Preview supports image inputs. Here is an example with an image:

```python Example theme={null}
from openai import OpenAI

client = OpenAI(
    base_url="http://localhost:30000/v1",
    api_key="EMPTY",
)

response = client.chat.completions.create(
    model="internLM/Intern-S2-Preview",
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "image_url",
                    "image_url": {
                        "url": "https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/tests/data/tiger.jpeg"
                    },
                },
                {
                    "type": "text",
                    "text": "Describe this image in detail.",
                },
            ],
        }
    ],
    max_tokens=2048,
    stream=True,
)

thinking_started = False
has_thinking = False
has_answer = False

for chunk in response:
    if chunk.choices and len(chunk.choices) > 0:
        delta = chunk.choices[0].delta

        if hasattr(delta, "reasoning_content") and delta.reasoning_content:
            if not thinking_started:
                print("=============== Thinking =================", flush=True)
                thinking_started = True
            has_thinking = True
            print(delta.reasoning_content, end="", flush=True)

        if delta.content:
            if has_thinking and not has_answer:
                print("\n=============== Content =================", flush=True)
                has_answer = True
            print(delta.content, end="", flush=True)

print()
```

#### 4.2.2 Reasoning Parser

Enable streaming to read reasoning content separately from the final answer:

```python Example theme={null}
from openai import OpenAI

client = OpenAI(
    base_url="http://localhost:30000/v1",
    api_key="EMPTY",
)

response = client.chat.completions.create(
    model="internLM/Intern-S2-Preview",
    messages=[
        {"role": "user", "content": "Solve this step by step: What is 15% of 240?"}
    ],
    max_tokens=2048,
    stream=True,
)

thinking_started = False
has_thinking = False
has_answer = False

for chunk in response:
    if chunk.choices and len(chunk.choices) > 0:
        delta = chunk.choices[0].delta

        if hasattr(delta, "reasoning_content") and delta.reasoning_content:
            if not thinking_started:
                print("=============== Thinking =================", flush=True)
                thinking_started = True
            has_thinking = True
            print(delta.reasoning_content, end="", flush=True)

        if delta.content:
            if has_thinking and not has_answer:
                print("\n=============== Content =================", flush=True)
                has_answer = True
            print(delta.content, end="", flush=True)

print()
```

#### 4.2.3 Tool Calling

Serve with `--tool-call-parser qwen3_coder` enabled, then send OpenAI-compatible tool requests:

```python Example theme={null}
from openai import OpenAI

client = OpenAI(
    base_url="http://localhost:30000/v1",
    api_key="EMPTY",
)

tools = [
    {
        "type": "function",
        "function": {
            "name": "get_weather",
            "description": "Get the current weather for a location",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "The city name",
                    }
                },
                "required": ["location"],
            },
        },
    }
]

response = client.chat.completions.create(
    model="internLM/Intern-S2-Preview",
    messages=[{"role": "user", "content": "What is the weather in Beijing?"}],
    tools=tools,
    max_tokens=1024,
)

print(response.choices[0].message)
```