Fix grammar and punctuation errors in README.md

- Fix comma splice: split into two sentences in Section 5 (Deployment)
- Remove extra space before colon
- Add missing periods at end of sentences
- Fix subject-verb agreement in code comments ("support" → "supports")
- Standardize equal signs count in print statements

Files changed (1) hide show

README.md +14 -14

README.md CHANGED Viewed

@@ -527,7 +527,7 @@ Kimi-K2.5 adopts the same native int4 quantization method as [Kimi-K2-Thinking](
 ## 5. Deployment
 > [!Note]
-> You can access Kimi-K2.5's API on https://platform.moonshot.ai , we provide OpenAI/Anthropic-compatible API for you. To verify the deployment is correct, we also provide the  [Kimi Vendor Verifier](https://kimi.com/blog/kimi-vendor-verifier.html).
 Currently, Kimi-K2.5 is recommended to run on the following inference engines:
 * vLLM
 * SGLang
@@ -543,13 +543,13 @@ Deployment examples can be found in the [Model Deployment Guide](docs/deploy_gui
 The usage demos below demonstrate how to call our official API.
-For third-party API deployed with vLLM or SGLang, please note that :
 > [!Note]
-> - Chat with video content is an experimental feature and is only supported in our official API for now
 >
 > - The recommended `temperature` will be `1.0` for Thinking mode and `0.6` for Instant mode.
 >
-> - The recommended `top_p` is `0.95`
 >
 > - To use instant mode, you need to pass `{'chat_template_kwargs': {"thinking": False}}` in `extra_body`.
@@ -574,9 +574,9 @@ def simple_chat(client: openai.OpenAI, model_name: str):
     response = client.chat.completions.create(
         model=model_name, messages=messages, stream=False, max_tokens=4096
     )
-    print('===== Below is reasoning_content in Thinking Mode ======')
     print(f'reasoning content: {response.choices[0].message.reasoning_content}')
-    print('===== Below is response in Thinking Mode ======')
     print(f'response: {response.choices[0].message.content}')
     # To use instant mode, pass {"thinking" = {"type":"disabled"}}
@@ -588,7 +588,7 @@ def simple_chat(client: openai.OpenAI, model_name: str):
         extra_body={'thinking': {'type': 'disabled'}},  # this is for official API
         # extra_body= {'chat_template_kwargs': {"thinking": False}}  # this is for vLLM/SGLang
     )
-    print('===== Below is response in Instant Mode ======')
     print(f'response: {response.choices[0].message.content}')
 ```
@@ -623,12 +623,12 @@ def chat_with_image(client: openai.OpenAI, model_name: str):
     response = client.chat.completions.create(
         model=model_name, messages=messages, stream=False, max_tokens=8192
     )
-    print('===== Below is reasoning_content in Thinking Mode ======')
     print(f'reasoning content: {response.choices[0].message.reasoning_content}')
-    print('===== Below is response in Thinking Mode ======')
     print(f'response: {response.choices[0].message.content}')
-    # Also support instant mode if pass {"thinking" = {"type":"disabled"}}
     response = client.chat.completions.create(
         model=model_name,
         messages=messages,
@@ -637,7 +637,7 @@ def chat_with_image(client: openai.OpenAI, model_name: str):
         extra_body={'thinking': {'type': 'disabled'}},  # this is for official API
         # extra_body= {'chat_template_kwargs': {"thinking": False}}  # this is for vLLM/SGLang
     )
-    print('===== Below is response in Instant Mode ======')
     print(f'response: {response.choices[0].message.content}')
     return response.choices[0].message.content
@@ -667,9 +667,9 @@ def chat_with_video(client: openai.OpenAI, model_name:str):
     ]
     response = client.chat.completions.create(model=model_name, messages=messages)
-    print('===== Below is reasoning_content in Thinking Mode ======')
     print(f'reasoning content: {response.choices[0].message.reasoning_content}')
-    print('===== Below is response in Thinking Mode ======')
     print(f'response: {response.choices[0].message.content}')
     # Also support instant mode if pass {"thinking" = {"type":"disabled"}}
@@ -681,7 +681,7 @@ def chat_with_video(client: openai.OpenAI, model_name:str):
         extra_body={'thinking': {'type': 'disabled'}},  # this is for official API
         # extra_body= {'chat_template_kwargs': {"thinking": False}}  # this is for vLLM/SGLang
     )
-    print('===== Below is response in Instant Mode ======')
     print(f'response: {response.choices[0].message.content}')
     return response.choices[0].message.content
 ```

 ## 5. Deployment
 > [!Note]
+> You can access Kimi-K2.5's API on https://platform.moonshot.ai and we provide OpenAI/Anthropic-compatible API for you. To verify the deployment is correct, we also provide the  [Kimi Vendor Verifier](https://kimi.com/blog/kimi-vendor-verifier.html).
 Currently, Kimi-K2.5 is recommended to run on the following inference engines:
 * vLLM
 * SGLang
 The usage demos below demonstrate how to call our official API.
+For third-party APIs deployed with vLLM or SGLang, please note that:
 > [!Note]
+> - Chat with video content is an experimental feature and is only supported in our official API for now.
 >
 > - The recommended `temperature` will be `1.0` for Thinking mode and `0.6` for Instant mode.
 >
+> - The recommended `top_p` is `0.95`.
 >
 > - To use instant mode, you need to pass `{'chat_template_kwargs': {"thinking": False}}` in `extra_body`.
     response = client.chat.completions.create(
         model=model_name, messages=messages, stream=False, max_tokens=4096
     )
+    print('====== Below is reasoning_content in Thinking Mode ======')
     print(f'reasoning content: {response.choices[0].message.reasoning_content}')
+    print('====== Below is response in Thinking Mode ======')
     print(f'response: {response.choices[0].message.content}')
     # To use instant mode, pass {"thinking" = {"type":"disabled"}}
         extra_body={'thinking': {'type': 'disabled'}},  # this is for official API
         # extra_body= {'chat_template_kwargs': {"thinking": False}}  # this is for vLLM/SGLang
     )
+    print('====== Below is response in Instant Mode ======')
     print(f'response: {response.choices[0].message.content}')
 ```
     response = client.chat.completions.create(
         model=model_name, messages=messages, stream=False, max_tokens=8192
     )
+    print('====== Below is reasoning_content in Thinking Mode ======')
     print(f'reasoning content: {response.choices[0].message.reasoning_content}')
+    print('====== Below is response in Thinking Mode ======')
     print(f'response: {response.choices[0].message.content}')
+    # Also support instant mode if you pass {"thinking" = {"type":"disabled"}}
     response = client.chat.completions.create(
         model=model_name,
         messages=messages,
         extra_body={'thinking': {'type': 'disabled'}},  # this is for official API
         # extra_body= {'chat_template_kwargs': {"thinking": False}}  # this is for vLLM/SGLang
     )
+    print('====== Below is response in Instant Mode ======')
     print(f'response: {response.choices[0].message.content}')
     return response.choices[0].message.content
     ]
     response = client.chat.completions.create(model=model_name, messages=messages)
+    print('====== Below is reasoning_content in Thinking Mode ======')
     print(f'reasoning content: {response.choices[0].message.reasoning_content}')
+    print('====== Below is response in Thinking Mode ======')
     print(f'response: {response.choices[0].message.content}')
     # Also support instant mode if pass {"thinking" = {"type":"disabled"}}
         extra_body={'thinking': {'type': 'disabled'}},  # this is for official API
         # extra_body= {'chat_template_kwargs': {"thinking": False}}  # this is for vLLM/SGLang
     )
+    print('====== Below is response in Instant Mode ======')
     print(f'response: {response.choices[0].message.content}')
     return response.choices[0].message.content
 ```