vllm.entrypoints.openai.responses.protocol ¶
ResponseInputOutputItem module-attribute ¶
ResponseInputOutputItem: TypeAlias = (
ResponseInputItemParam | ResponseOutputItem
)
ResponseInputOutputMessage module-attribute ¶
ResponseInputOutputMessage: TypeAlias = (
list[ChatCompletionMessageParam]
| list[ResponseRawMessageAndToken]
)
StreamingResponsesResponse module-attribute ¶
StreamingResponsesResponse: TypeAlias = (
ResponseCreatedEvent
| ResponseInProgressEvent
| ResponseCompletedEvent
| ResponseOutputItemAddedEvent
| ResponseOutputItemDoneEvent
| ResponseContentPartAddedEvent
| ResponseContentPartDoneEvent
| ResponseReasoningTextDeltaEvent
| ResponseReasoningTextDoneEvent
| ResponseReasoningPartAddedEvent
| ResponseReasoningPartDoneEvent
| ResponseCodeInterpreterCallInProgressEvent
| ResponseCodeInterpreterCallCodeDeltaEvent
| ResponseWebSearchCallInProgressEvent
| ResponseWebSearchCallSearchingEvent
| ResponseWebSearchCallCompletedEvent
| ResponseCodeInterpreterCallCodeDoneEvent
| ResponseCodeInterpreterCallInterpretingEvent
| ResponseCodeInterpreterCallCompletedEvent
| ResponseMcpCallArgumentsDeltaEvent
| ResponseMcpCallArgumentsDoneEvent
| ResponseMcpCallInProgressEvent
| ResponseMcpCallCompletedEvent
)
InputTokensDetails ¶
Bases: OpenAIBaseModel
Source code in vllm/entrypoints/openai/responses/protocol.py
cached_tokens_per_turn class-attribute instance-attribute ¶
OutputTokensDetails ¶
Bases: OpenAIBaseModel
Source code in vllm/entrypoints/openai/responses/protocol.py
output_tokens_per_turn class-attribute instance-attribute ¶
ResponseCompletedEvent ¶
Bases: ResponseCompletedEvent
Source code in vllm/entrypoints/openai/responses/protocol.py
ResponseCreatedEvent ¶
Bases: ResponseCreatedEvent
Source code in vllm/entrypoints/openai/responses/protocol.py
ResponseInProgressEvent ¶
Bases: ResponseInProgressEvent
Source code in vllm/entrypoints/openai/responses/protocol.py
ResponseRawMessageAndToken ¶
Bases: OpenAIBaseModel
Class to show the raw message. If message / tokens diverge, tokens is the source of truth
Source code in vllm/entrypoints/openai/responses/protocol.py
ResponseReasoningPartAddedEvent ¶
Bases: OpenAIBaseModel
Source code in vllm/entrypoints/openai/responses/protocol.py
ResponseReasoningPartDoneEvent ¶
Bases: OpenAIBaseModel
Source code in vllm/entrypoints/openai/responses/protocol.py
ResponseUsage ¶
ResponsesRequest ¶
Bases: OpenAIBaseModel
Source code in vllm/entrypoints/openai/responses/protocol.py
134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 | |
_DEFAULT_SAMPLING_PARAMS class-attribute instance-attribute ¶
cache_salt class-attribute instance-attribute ¶
cache_salt: str | None = Field(
default=None,
description="If specified, the prefix cache will be salted with the provided string to prevent an attacker to guess prompts in multi-user environments. The salt should be random, protected from access by 3rd parties, and long enough to be unpredictable (e.g., 43 characters base64-encoded, corresponding to 256 bit).",
)
enable_response_messages class-attribute instance-attribute ¶
enable_response_messages: bool = Field(
default=False,
description="Dictates whether or not to return messages as part of the response object. Currently only supported fornon-background and gpt-oss only. ",
)
include class-attribute instance-attribute ¶
include: (
list[
Literal[
"code_interpreter_call.outputs",
"computer_call_output.output.image_url",
"file_search_call.results",
"message.input_image.image_url",
"message.output_text.logprobs",
"reasoning.encrypted_content",
],
]
| None
) = None
include_stop_str_in_output class-attribute instance-attribute ¶
include_stop_str_in_output: bool = False
mm_processor_kwargs class-attribute instance-attribute ¶
mm_processor_kwargs: dict[str, Any] | None = Field(
default=None,
description="Additional kwargs to pass to the HF processor.",
)
previous_input_messages class-attribute instance-attribute ¶
priority class-attribute instance-attribute ¶
priority: int = Field(
default=0,
description="The priority of the request (lower means earlier handling; default: 0). Any priority other than 0 will raise an error if the served model does not use priority scheduling.",
)
prompt_cache_key class-attribute instance-attribute ¶
prompt_cache_key: str | None = Field(
default=None,
description="A key that was used to read from or write to the prompt cache.Note: This field has not been implemented yet and vLLM will ignore it.",
)
request_id class-attribute instance-attribute ¶
request_id: str = Field(
default_factory=lambda: f"resp_{random_uuid()}",
description="The request_id related to this request. If the caller does not set it, a random_uuid will be generated. This id is used through out the inference process and return in response.",
)
service_tier class-attribute instance-attribute ¶
service_tier: Literal[
"auto", "default", "flex", "scale", "priority"
] = "auto"
truncation class-attribute instance-attribute ¶
truncation: Literal['auto', 'disabled'] | None = 'disabled'
check_cache_salt_support ¶
Source code in vllm/entrypoints/openai/responses/protocol.py
function_call_parsing ¶
Parse function_call dictionaries into ResponseFunctionToolCall objects. This ensures Pydantic can properly resolve union types in the input field. Function calls provided as dicts are converted to ResponseFunctionToolCall objects before validation, while invalid structures are left for Pydantic to reject with appropriate error messages.
Source code in vllm/entrypoints/openai/responses/protocol.py
is_include_output_logprobs ¶
is_include_output_logprobs() -> bool
Check if the request includes output logprobs.
Source code in vllm/entrypoints/openai/responses/protocol.py
to_sampling_params ¶
to_sampling_params(
default_max_tokens: int,
default_sampling_params: dict | None = None,
) -> SamplingParams
Source code in vllm/entrypoints/openai/responses/protocol.py
validate_background ¶
Source code in vllm/entrypoints/openai/responses/protocol.py
validate_prompt ¶
ResponsesResponse ¶
Bases: OpenAIBaseModel
Source code in vllm/entrypoints/openai/responses/protocol.py
374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 | |
created_at class-attribute instance-attribute ¶
id class-attribute instance-attribute ¶
id: str = Field(
default_factory=lambda: f"resp_{random_uuid()}"
)
incomplete_details class-attribute instance-attribute ¶
input_messages class-attribute instance-attribute ¶
input_messages: ResponseInputOutputMessage | None = Field(
default=None,
description="If enable_response_messages, we can show raw token input to model.",
)
output_messages class-attribute instance-attribute ¶
output_messages: ResponseInputOutputMessage | None = Field(
default=None,
description="If enable_response_messages, we can show raw token output of model.",
)
service_tier instance-attribute ¶
service_tier: Literal[
"auto", "default", "flex", "scale", "priority"
]
from_request classmethod ¶
from_request(
request: ResponsesRequest,
sampling_params: SamplingParams,
model_name: str,
created_time: int,
output: list[ResponseOutputItem],
status: ResponseStatus,
usage: ResponseUsage | None = None,
input_messages: ResponseInputOutputMessage
| None = None,
output_messages: ResponseInputOutputMessage
| None = None,
) -> ResponsesResponse
Source code in vllm/entrypoints/openai/responses/protocol.py
serialize_input_messages ¶
serialize_message ¶
Serializes a single message