> ## Documentation Index
> Fetch the complete documentation index at: https://docs.sglang.io/llms.txt
> Use this file to discover all available pages before exploring further.

# Kimi-K2.7-Code

> Deploy Kimi-K2.7-Code with SGLang for coding-focused agentic workflows, thinking output, tool calling, and multimodal input.

export const KimiK27CodeDeployment = () => {
  const options = {
    hardware: {
      name: 'hardware',
      title: 'Hardware Platform',
      items: [{
        id: 'h200',
        label: 'H200',
        default: true
      }, {
        id: 'b300',
        label: 'B300',
        default: false
      }, {
        id: 'gb300',
        label: 'GB300',
        default: false
      }, {
        id: 'mi300x',
        label: 'MI300X',
        default: false
      }, {
        id: 'mi325x',
        label: 'MI325X',
        default: false
      }, {
        id: 'mi350x',
        label: 'MI350X',
        default: false
      }, {
        id: 'mi355x',
        label: 'MI355X',
        default: false
      }]
    },
    reasoning: {
      name: 'reasoning',
      title: 'Reasoning Parser',
      items: [{
        id: 'disabled',
        label: 'Disabled',
        default: false
      }, {
        id: 'enabled',
        label: 'Enabled',
        default: true
      }]
    },
    toolcall: {
      name: 'toolcall',
      title: 'Tool Call Parser',
      items: [{
        id: 'disabled',
        label: 'Disabled',
        default: false
      }, {
        id: 'enabled',
        label: 'Enabled',
        default: true
      }]
    },
    dpattention: {
      name: 'dpattention',
      title: 'DP Attention',
      items: [{
        id: 'disabled',
        label: 'Disabled',
        subtitle: 'Low Latency',
        default: true
      }, {
        id: 'enabled',
        label: 'Enabled',
        subtitle: 'High Throughput',
        default: false
      }]
    }
  };
  const modelConfigs = {
    h200: {
      tp: 8
    },
    b300: {
      tp: 8
    },
    gb300: {
      tp: 4
    },
    mi300x: {
      tp: 4
    },
    mi325x: {
      tp: 4
    },
    mi350x: {
      tp: 4
    },
    mi355x: {
      tp: 4
    }
  };
  const resolveItems = (option, values) => typeof option.getDynamicItems === 'function' ? option.getDynamicItems(values) : option.items || [];
  const getInitialState = () => {
    const initialState = {};
    for (const [key, option] of Object.entries(options)) {
      const items = resolveItems(option, initialState);
      const def = items.find(item => item.default && !item.disabled) || items.find(item => !item.disabled) || items[0];
      initialState[key] = def.id;
    }
    return initialState;
  };
  const [values, setValues] = useState(getInitialState);
  const [isDark, setIsDark] = useState(false);
  useEffect(() => {
    const checkDarkMode = () => {
      const html = document.documentElement;
      const isDarkMode = html.classList.contains('dark') || html.getAttribute('data-theme') === 'dark' || html.style.colorScheme === 'dark';
      setIsDark(isDarkMode);
    };
    checkDarkMode();
    const observer = new MutationObserver(checkDarkMode);
    observer.observe(document.documentElement, {
      attributes: true,
      attributeFilter: ['class', 'data-theme', 'style']
    });
    return () => observer.disconnect();
  }, []);
  useEffect(() => {
    setValues(prev => {
      const next = {
        ...prev
      };
      for (const [key, option] of Object.entries(options)) {
        if (typeof option.condition === 'function' && !option.condition(next)) {
          const items = resolveItems(option, next);
          const fallback = items.find(item => item.default && !item.disabled) || items.find(item => !item.disabled);
          if (fallback) next[key] = fallback.id;
          continue;
        }
        if (typeof option.getDynamicItems !== 'function') continue;
        const items = option.getDynamicItems(next);
        const current = items.find(item => item.id === next[key]);
        if (!current || current.disabled) {
          const fallback = items.find(item => item.default && !item.disabled) || items.find(item => !item.disabled);
          if (fallback) next[key] = fallback.id;
        }
      }
      return next;
    });
  }, [values.hardware]);
  const handleRadioChange = (optionName, value) => {
    setValues(prev => ({
      ...prev,
      [optionName]: value
    }));
  };
  const generateCommand = () => {
    const {hardware, reasoning, toolcall, dpattention} = values;
    const isAMD = hardware === 'mi300x' || hardware === 'mi325x' || hardware === 'mi350x' || hardware === 'mi355x';
    const hwConfig = modelConfigs[hardware];
    const tpValue = hwConfig.tp;
    const modelName = 'moonshotai/Kimi-K2.7-Code';
    let cmd = '';
    if (isAMD) {
      cmd += 'SGLANG_USE_AITER=1 SGLANG_ROCM_FUSED_DECODE_MLA=0 \\\n';
    }
    cmd += 'sglang serve \\\n';
    cmd += `  --model-path ${modelName}`;
    cmd += ` \\\n  --tp ${tpValue}`;
    if (isAMD) {
      cmd += ' \\\n  --mem-fraction-static 0.8';
    }
    cmd += ' \\\n  --trust-remote-code';
    if (dpattention === 'enabled') {
      cmd += ` \\\n  --dp ${tpValue} \\\n  --enable-dp-attention`;
    }
    if (reasoning === 'enabled') {
      cmd += ' \\\n  --reasoning-parser kimi_k2';
    }
    if (toolcall === 'enabled') {
      cmd += ' \\\n  --tool-call-parser kimi_k2';
    }
    if (hardware === 'b300' || hardware === 'gb300') {
      cmd += ' \\\n  --attention-backend tokenspeed_mla';
    }
    if (isAMD) {
      cmd += ' \\\n  --kv-cache-dtype fp8_e4m3';
    }
    cmd += ' \\\n  --host 0.0.0.0 \\\n  --port 30000';
    return cmd;
  };
  const containerStyle = {
    maxWidth: '900px',
    margin: '0 auto',
    display: 'flex',
    flexDirection: 'column',
    gap: '4px'
  };
  const cardStyle = {
    padding: '8px 12px',
    border: `1px solid ${isDark ? '#374151' : '#e5e7eb'}`,
    borderLeft: `3px solid ${isDark ? '#E85D4D' : '#D45D44'}`,
    borderRadius: '4px',
    display: 'flex',
    alignItems: 'center',
    gap: '12px',
    background: isDark ? '#1f2937' : '#fff'
  };
  const titleStyle = {
    fontSize: '13px',
    fontWeight: '600',
    minWidth: '140px',
    flexShrink: 0,
    color: isDark ? '#e5e7eb' : 'inherit'
  };
  const itemsStyle = {
    display: 'flex',
    rowGap: '2px',
    columnGap: '6px',
    flexWrap: 'wrap',
    alignItems: 'center',
    flex: 1
  };
  const labelBaseStyle = {
    padding: '4px 10px',
    border: `1px solid ${isDark ? '#9ca3af' : '#d1d5db'}`,
    borderRadius: '3px',
    cursor: 'pointer',
    display: 'inline-flex',
    flexDirection: 'column',
    alignItems: 'center',
    justifyContent: 'center',
    fontWeight: '500',
    fontSize: '13px',
    transition: 'all 0.2s',
    userSelect: 'none',
    minWidth: '45px',
    textAlign: 'center',
    flex: 1,
    background: isDark ? '#374151' : '#fff',
    color: isDark ? '#e5e7eb' : 'inherit'
  };
  const checkedStyle = {
    background: '#D45D44',
    color: 'white',
    borderColor: '#D45D44'
  };
  const disabledStyle = {
    cursor: 'not-allowed',
    opacity: 0.4
  };
  const subtitleStyle = {
    display: 'block',
    fontSize: '9px',
    marginTop: '1px',
    lineHeight: '1.1',
    opacity: 0.7
  };
  const commandDisplayStyle = {
    flex: 1,
    padding: '12px 16px',
    background: isDark ? '#111827' : '#f5f5f5',
    borderRadius: '6px',
    fontFamily: "'Menlo', 'Monaco', 'Courier New', monospace",
    fontSize: '12px',
    lineHeight: '1.5',
    color: isDark ? '#e5e7eb' : '#374151',
    whiteSpace: 'pre-wrap',
    overflowX: 'auto',
    margin: 0,
    border: `1px solid ${isDark ? '#374151' : '#e5e7eb'}`
  };
  return <div style={containerStyle} className="not-prose">
      {Object.entries(options).map(([key, option]) => {
    if (typeof option.condition === 'function' && !option.condition(values)) return null;
    const items = resolveItems(option, values);
    return <div key={key} style={cardStyle}>
            <div style={titleStyle}>{option.title}</div>
            <div style={itemsStyle}>
              {items.map(item => {
      const isChecked = values[option.name] === item.id;
      const isDisabled = !!item.disabled;
      return <label key={item.id} style={{
        ...labelBaseStyle,
        ...isChecked ? checkedStyle : {},
        ...isDisabled ? disabledStyle : {}
      }} title={item.disabledReason || ''}>
                    <input type="radio" name={option.name} value={item.id} checked={isChecked} disabled={isDisabled} onChange={() => !isDisabled && handleRadioChange(option.name, item.id)} style={{
        display: 'none'
      }} />
                    {item.label}
                    {item.subtitle && <small style={{
        ...subtitleStyle,
        color: isChecked ? 'rgba(255,255,255,0.85)' : 'inherit'
      }}>{item.subtitle}</small>}
                  </label>;
    })}
            </div>
          </div>;
  })}
      <div style={cardStyle}>
        <div style={titleStyle}>Run this Command:</div>
        <pre style={commandDisplayStyle}>{generateCommand()}</pre>
      </div>
    </div>;
};

## 1. Model Introduction

[Kimi-K2.7-Code](https://huggingface.co/moonshotai/Kimi-K2.7-Code) is a coding-focused agentic model by Moonshot AI, built on top of Kimi-K2.6. It improves real-world long-horizon coding task completion while reducing thinking-token usage by approximately 30% compared with Kimi-K2.6.

**Key Features:**

* **Coding-Focused Agentic Model**: Optimized for end-to-end coding workflows and complex software engineering tasks.
* **Token Efficiency**: Reduces thinking-token usage by approximately 30% versus Kimi-K2.6.
* **K2.6-Compatible Deployment**: Shares the same architecture as Kimi-K2.5/Kimi-K2.6, so the SGLang deployment method can be reused with the new model ID.
* **Native Multimodality**: Shares Kimi-K2.6's native multimodal architecture with a MoonViT vision encoder (400M parameters) and supports image and video (experimental) input.

**Benchmarks:**

<table>
  <thead>
    <tr>
      <th>Benchmark</th>
      <th>Kimi-K2.6</th>
      <th>Kimi-K2.7-Code</th>
    </tr>
  </thead>

  <tbody>
    <tr>
      <td>Kimi Code Bench v2</td>
      <td>50.9</td>
      <td>62.0</td>
    </tr>

    <tr>
      <td>Program Bench</td>
      <td>48.3</td>
      <td>53.6</td>
    </tr>

    <tr>
      <td>MLS Bench Lite</td>
      <td>26.7</td>
      <td>35.1</td>
    </tr>

    <tr>
      <td>Kimi Claw 24/7 Bench</td>
      <td>42.9</td>
      <td>46.9</td>
    </tr>

    <tr>
      <td>MCP Atlas</td>
      <td>69.4</td>
      <td>76.0</td>
    </tr>

    <tr>
      <td>MCP Mark Verified</td>
      <td>72.8</td>
      <td>81.1</td>
    </tr>
  </tbody>
</table>

**Recommended Generation Parameters:**

* Thinking Mode: `temperature=1.0`, `top_p=0.95`
* Kimi-K2.7-Code forces thinking and preserve-thinking behavior; instant mode is not supported.

**Available Models:**

* **INT4 (native checkpoint)**: [moonshotai/Kimi-K2.7-Code](https://huggingface.co/moonshotai/Kimi-K2.7-Code)

**License:** Modified MIT for the native checkpoint.

For details, see the [official model card](https://huggingface.co/moonshotai/Kimi-K2.7-Code).

## 2. SGLang Installation

Refer to the [official SGLang installation guide](/docs/get-started/install).

## 3. Model Deployment

### 3.1 Basic Configuration

**Interactive Command Generator**: Use the configuration selector below to automatically generate the appropriate deployment command for your hardware platform, deployment strategy, and capabilities.

<KimiK27CodeDeployment />

### 3.2 Configuration Tips

* **Memory**: Requires GPUs with ≥140GB each. The native INT4 checkpoint supports H200 (8×, TP=8), B300 (8×, TP=8), GB300 (4×, TP=4), MI300X/MI325X (4×, TP=4), and MI350X/MI355X (4×, TP=4). Use `--context-length 128000` to conserve memory.
* **Context Length**: The model supports a 256K context length. Use a shorter `--context-length` when you need to reserve memory for larger batches.
* **Transformers Version**: The model card requires `transformers>=4.57.1,<5.0.0`.
* **AMD GPU TP Constraint**: On AMD GPUs, TP must be ≤ 4 (not 8). Kimi-K2.7-Code has 64 attention heads; the AITER MLA kernel requires `heads_per_gpu % 16 == 0`. With TP=4, each GPU gets 16 heads (valid). With TP=8, each GPU gets 8 heads (invalid).
* **AMD Docker Image**: Use `lmsysorg/sglang:v0.5.9-rocm700-mi35x` for MI350X/MI355X and `lmsysorg/sglang:v0.5.9-rocm700-mi30x` for MI300X/MI325X.
* **DP Attention**: Enable with `--dp <N> --enable-dp-attention` for production throughput. A common choice is to set `--dp` equal to `--tp`, but this is not required.
* **Reasoning Parser**: Add `--reasoning-parser kimi_k2` to separate thinking and content in model outputs.
* **Tool Call Parser**: Add `--tool-call-parser kimi_k2` for structured tool calls.
* **AMD FP8 KV Cache**: On AMD platforms the generator adds `--kv-cache-dtype fp8_e4m3` by default and sets `--mem-fraction-static 0.8` to fit the INT4 weights plus KV cache. FP8 KV cache trades a small amount of accuracy for memory; omit the flag if you observe accuracy regressions on your workload.

## 4. Model Invocation

### 4.1 Basic Usage

See [Basic API Usage](/docs/basic_usage/send_request).

### 4.2 Advanced Usage

#### 4.2.1 Multimodal (Vision + Text) Input

Kimi-K2.7-Code supports native multimodal input with images:

```python Example theme={null}
from openai import OpenAI

client = OpenAI(
    base_url="http://localhost:30000/v1",
    api_key="EMPTY"
)

response = client.chat.completions.create(
    model="moonshotai/Kimi-K2.7-Code",
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "image_url",
                    "image_url": {
                        "url": "https://ofasys-multimodal-wlcb-3-toshanghai.oss-accelerate.aliyuncs.com/wpf272043/keepme/image/receipt.png"
                    }
                },
                {
                    "type": "text",
                    "text": "What is in this image? Describe it in detail."
                }
            ]
        }
    ]
)

print(response.choices[0].message.content)
```

**Output Example:**

```text Output theme={null}
This image shows a **paper receipt from Auntie Anne's**, the pretzel chain restaurant. Here's a detailed breakdown:

## Header
- At the top left is the Auntie Anne's logo (a pretzel with a halo)
- The store name "**Auntie Anne's**" is printed prominently at the top
- Some text below the store name appears blurred/redacted (likely store location, address, or transaction details)

## Purchase Details
- **Item**: CINNAMON SUGAR
- **Quantity & Price**: 1 × 17,000
- **Item Total**: 17,000

## Financial Summary
- **SUB TOTAL**: 17,000
- **GRAND TOTAL**: 17,000
- **CASH IDR**: 20,000 (customer paid 20,000 Indonesian Rupiah)
- **CHANGE DUE**: 3,000

## Physical Description
- The receipt is printed on white thermal paper
- Some information in the middle section and toward the bottom is intentionally blurred/obscured
- The paper appears slightly curved/wrinkled and is placed on a dark brown surface (likely a table or counter)

The transaction is in **Indonesian Rupiah (IDR)**, indicating this purchase was made at an Auntie Anne's location in Indonesia. The customer bought one Cinnamon Sugar pretzel for 17,000 IDR and received 3,000 IDR in change after paying with 20,000 IDR cash.
```

#### 4.2.2 Reasoning Output

Kimi-K2.7-Code forces thinking mode and preserve-thinking behavior.

**Thinking Mode (default)** — reasoning content is automatically separated:

```python Example theme={null}
from openai import OpenAI

client = OpenAI(
    base_url="http://localhost:30000/v1",
    api_key="EMPTY"
)

response = client.chat.completions.create(
    model="moonshotai/Kimi-K2.7-Code",
    messages=[
        {"role": "user", "content": "Which one is bigger, 9.11 or 9.9? Think carefully."}
    ]
)

print("====== Reasoning Content (Thinking Mode) ======")
print(response.choices[0].message.reasoning_content)
print("====== Response (Thinking Mode) ======")
print(response.choices[0].message.content)
```

**Output Example:**

```text Output theme={null}
====== Reasoning Content (Thinking Mode) ======
The user is asking which number is bigger: 9.11 or 9.9. This seems straightforward, but there's a viral internet debate about this due to decimal confusion.

Let me think carefully:
- 9.11 means 9 + 11/100 = 9.11
- 9.9 means 9 + 9/10 = 9.90

So 9.9 = 9.90, and 9.90 > 9.11 because 0.90 > 0.11.

The confusion often comes from people thinking of software versioning (where 9.11 comes after 9.9) or comparing the numbers after the decimal as whole numbers (11 vs 9, thinking 11 > 9).

So mathematically, 9.9 is clearly bigger. 9.9 - 9.11 = 0.79.

I should explain this clearly and address the common misconception.
====== Response (Thinking Mode) ======
Mathematically, **9.9 is bigger**.

Here's why:

**9.9 = 9.90**

When comparing decimals, you need to look at the same place values:
- 9.11 = 9 ones, 1 tenth, and 1 hundredth
- 9.9 = 9 ones, 9 tenths, and 0 hundredths (9.90)

Since **0.90 > 0.11**, it follows that **9.9 > 9.11**.

The difference is:
9.9 - 9.11 = 0.79

**Why people get confused:** Many mistakenly treat the decimals like whole numbers (thinking "11 is bigger than 9") or confuse this with software version numbering (where version 9.11 comes after version 9.9). But in standard mathematics, 9.9 is definitively larger.
```

#### 4.2.3 Preserve Thinking

Kimi-K2.7-Code keeps reasoning content across multi-turn interactions. This behavior is enabled by default and cannot be disabled.

```python Example theme={null}
from openai import OpenAI

client = OpenAI(
    base_url="http://localhost:30000/v1",
    api_key="EMPTY"
)

messages = [
    {
        "role": "user",
        "content": "Tell me three random numbers."
    },
    {
        "role": "assistant",
        "reasoning_content": "I'll start by listing five numbers: 473, 921, 235, 215, 222, and I'll tell you the first three.",
        "content": "473, 921, 235"
    },
    {
        "role": "user",
        "content": "What are the other two numbers you have in mind?"
    }
]

response = client.chat.completions.create(
    model="moonshotai/Kimi-K2.7-Code",
    messages=messages,
    stream=False,
    max_tokens=4096,
)

print(response.choices[0].message.content)
```

Some OpenAI-compatible deployments use `reasoning` instead of `reasoning_content` in assistant messages. Use the field your serving stack exposes.

#### 4.2.4 Tool Calling

Kimi-K2.7-Code supports tool calling capabilities for agentic tasks:

```python Example theme={null}
from openai import OpenAI

client = OpenAI(
    base_url="http://localhost:30000/v1",
    api_key="EMPTY"
)

# Define available tools
tools = [
    {
        "type": "function",
        "function": {
            "name": "get_weather",
            "description": "Get the current weather for a location",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "The city name"
                    },
                    "unit": {
                        "type": "string",
                        "enum": ["celsius", "fahrenheit"],
                        "description": "Temperature unit"
                    }
                },
                "required": ["location"]
            }
        }
    }
]

response = client.chat.completions.create(
    model="moonshotai/Kimi-K2.7-Code",
    messages=[
        {"role": "user", "content": "What's the weather in Beijing?"}
    ],
    tools=tools,
    stream=True
)

# Process streaming response
tool_calls_accumulator = {}

for chunk in response:
    if chunk.choices and len(chunk.choices) > 0:
        delta = chunk.choices[0].delta

        if hasattr(delta, 'tool_calls') and delta.tool_calls:
            for tool_call in delta.tool_calls:
                index = tool_call.index
                if index not in tool_calls_accumulator:
                    tool_calls_accumulator[index] = {'name': None, 'arguments': ''}
                if tool_call.function:
                    if tool_call.function.name:
                        tool_calls_accumulator[index]['name'] = tool_call.function.name
                    if tool_call.function.arguments:
                        tool_calls_accumulator[index]['arguments'] += tool_call.function.arguments

        if delta.content:
            print(delta.content, end="", flush=True)

for index, tool_call in sorted(tool_calls_accumulator.items()):
    print(f"Tool Call: {tool_call['name']}")
    print(f"  Arguments: {tool_call['arguments']}")
```

**Output Example:**

```text Output theme={null}
Tool Call: get_weather
  Arguments: {"location": "Beijing"}
```

**Handling Tool Call Results:**

```python Example theme={null}
# Send tool result back to the model
messages = [
    {"role": "user", "content": "What's the weather in Beijing?"},
    {
        "role": "assistant",
        "content": None,
        "tool_calls": [{
            "id": "call_123",
            "type": "function",
            "function": {
                "name": "get_weather",
                "arguments": '{"location": "Beijing", "unit": "celsius"}'
            }
        }]
    },
    {
        "role": "tool",
        "tool_call_id": "call_123",
        "content": "The weather in Beijing is 22°C and sunny."
    }
]

final_response = client.chat.completions.create(
    model="moonshotai/Kimi-K2.7-Code",
    messages=messages
)

print(final_response.choices[0].message.content)
```

**Output Example:**

```text Output theme={null}
The weather in Beijing is currently **22°C and sunny**. ☀️

It's a nice, warm day there—great for being outdoors!
```

#### 4.2.5 Multimodal + Tool Calling (Agentic Vision)

Combine vision understanding with tool calling for advanced agentic tasks:

```python Example theme={null}
from openai import OpenAI

client = OpenAI(
    base_url="http://localhost:30000/v1",
    api_key="EMPTY"
)

tools = [
    {
        "type": "function",
        "function": {
            "name": "search_product",
            "description": "Search for a product by name or description",
            "parameters": {
                "type": "object",
                "properties": {
                    "query": {
                        "type": "string",
                        "description": "The product name or description to search for"
                    }
                },
                "required": ["query"]
            }
        }
    }
]

response = client.chat.completions.create(
    model="moonshotai/Kimi-K2.7-Code",
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "image_url",
                    "image_url": {
                        "url": "https://ofasys-multimodal-wlcb-3-toshanghai.oss-accelerate.aliyuncs.com/wpf272043/keepme/image/receipt.png"
                    }
                },
                {
                    "type": "text",
                    "text": "Can you identify this product and search for similar items?"
                }
            ]
        }
    ],
    tools=tools
)

msg = response.choices[0].message

# Print reasoning process
if msg.reasoning_content:
    print("=== Reasoning ===")
    print(msg.reasoning_content)

# Print response content
if msg.content:
    print("=== Content ===")
    print(msg.content)

# Print tool calls
if msg.tool_calls:
    print("=== Tool Calls ===")
    for tc in msg.tool_calls:
        print(f"  Function: {tc.function.name}")
        print(f"  Arguments: {tc.function.arguments}")
```

**Output Example:**

```text Output theme={null}
=== Reasoning ===
The user wants me to identify the product from the receipt and search for similar items. Looking at the receipt, it's from Auntie Anne's and the item purchased is "CINNAMON SUGAR" for 17,000 IDR. This is likely a Cinnamon Sugar Pretzel from Auntie Anne's, which is a popular pretzel chain.

I should search for this product using the search_product function. The query should be something like "Auntie Anne's Cinnamon Sugar Pretzel" or just "Cinnamon Sugar Pretzel" to find similar items.
=== Content ===
Based on the receipt, the product is a **Cinnamon Sugar Pretzel** from **Auntie Anne's** (a popular pretzel bakery chain). The receipt shows it was purchased for 17,000 Indonesian Rupiah (IDR).

Let me search for this product and similar items for you.
=== Tool Calls ===
  Function: search_product
  Arguments: {"query":"Auntie Anne's Cinnamon Sugar Pretzel"}
```

#### 4.2.6 Deployment Command Example

Deploy Kimi-K2.7-Code with the following command (H200/B300, reasoning and tool parsing enabled):

```shell Command theme={null}
sglang serve \
  --model-path moonshotai/Kimi-K2.7-Code \
  --tp 8 \
  --reasoning-parser kimi_k2 \
  --tool-call-parser kimi_k2 \
  --trust-remote-code \
  --host 0.0.0.0 \
  --port 30000
```

For GB300, use `--tp 4`.

## 5. Benchmark

The following results are from the official Kimi-K2.7-Code model card. They were evaluated with thinking mode enabled through Kimi Code CLI at `temperature=1.0`, `top_p=0.95`, and a 262,144-token context length unless otherwise stated.

<table>
  <thead>
    <tr>
      <th>Category</th>
      <th>Benchmark</th>
      <th>Kimi-K2.6</th>
      <th>Kimi-K2.7-Code</th>
    </tr>
  </thead>

  <tbody>
    <tr>
      <td>Coding</td>
      <td>Kimi Code Bench v2</td>
      <td>50.9</td>
      <td>62.0</td>
    </tr>

    <tr>
      <td>Coding</td>
      <td>Program Bench</td>
      <td>48.3</td>
      <td>53.6</td>
    </tr>

    <tr>
      <td>Coding</td>
      <td>MLS Bench Lite</td>
      <td>26.7</td>
      <td>35.1</td>
    </tr>

    <tr>
      <td>Agentic</td>
      <td>Kimi Claw 24/7 Bench</td>
      <td>42.9</td>
      <td>46.9</td>
    </tr>

    <tr>
      <td>Agentic</td>
      <td>MCP Atlas</td>
      <td>69.4</td>
      <td>76.0</td>
    </tr>

    <tr>
      <td>Agentic</td>
      <td>MCP Mark Verified</td>
      <td>72.8</td>
      <td>81.1</td>
    </tr>
  </tbody>
</table>