Hello everyone,
I have a question. When I write code to load a model in OpenAI’s format, I encounter this error. I have tested it with OpenAI’s library, and it worked successfully.
Here is my JSON format:
{
"id": "chatcmpl-30101561cf",
"object": "chat.completion",
"created": 1742132277,
"model": "DeepSeek-R1-Distill-Qwen-7B-Q4_K_M",
"choices": [
{
"index": 0,
"logprobs": null,
"finish_reason": "stop",
"message": {
"role": "assistant",
"content": "</think>\n\n1 + 1 equals **2**."
}
}
],
"usage": {
"prompt_tokens": 10,
"completion_tokens": 6,
"total_tokens": 16
},
"stats": {
"tokens_per_second": 3.394546566015665,
"time_to_first_token": 0.111,
"generation_time": 1.7675409317016602,
"stop_reason": "eosFound"
},
"model_info": {
"arch": "deepseek",
"quant": "Q4_K_M",
"format": "gguf",
"context_length": 2048
},
"runtime": {
"name": "llama.cpp",
"version": "1.3.0",
"supported_formats": [
"gguf"
]
}
}
Can anyone help me? Thank you!