Inference
post
Body
environmentstring | nullableOptional
modelstring | nullableOptional
tagsstring[] | nullableOptional
isDebugbooleanOptional
maxTokensinteger · int32 | nullableOptional
seedinteger · int32Optional
temperaturenumber · float | nullableOptional
topPnumber · double | nullableOptional
topKinteger · int32 | nullableOptional
Responses
200
OK
post
POST /api/v1/inference HTTP/1.1
Host:
Content-Type: application/json
Accept: */*
Content-Length: 170
{
"messages": [
{
"role": "client",
"text": "text"
}
],
"environment": "text",
"model": "text",
"tags": [
"text"
],
"isDebug": true,
"maxTokens": 1,
"seed": 1,
"temperature": 1,
"topP": 1,
"topK": 1
}
200
OK
{
"creditsUsed": 1,
"llmCostCredits": 1,
"creditsLeft": 1,
"milliseconds": 1,
"result": {
"model": "text",
"provider": "text",
"usage": {
"tokensIn": 1,
"tokensOut": 1,
"tokensTotal": 1,
"costUsd": 1,
"costCredits": 1
}
},
"pair": {
"timestamp": "2025-07-01T18:24:07.384Z",
"response": {
"id": "text",
"idCreated": "2025-07-01T18:24:07.384Z",
"inferenceRequestId": "text",
"model": "text",
"provider": "text",
"usage": {
"tokensIn": 1,
"tokensOut": 1,
"tokensTotal": 1,
"costUsd": 1,
"costCredits": 1
},
"textResponse": "text",
"verbatimResponse": "text",
"userId": "text",
"milliseconds": 1,
"tenantId": "text",
"shuttleRequestId": "text"
},
"request": {
"id": "text",
"idCreated": "2025-07-01T18:24:07.384Z",
"shuttleRequestId": "text",
"messages": [
{
"role": "client",
"text": "text"
}
],
"model": "text",
"maxTokens": 1,
"seed": 1,
"temperature": 1,
"topP": 1,
"topK": 1
},
"contextId": "text"
}
}