vllm.beam_search ¶

BeamSearchInstance ¶

Source code in vllm/beam_search.py

class BeamSearchInstance:
    def __init__(
        self,
        prompt_tokens: list[int],
        lora_request: LoRARequest | None = None,
        logprobs: list[dict[int, Logprob]] | None = None,
        **kwargs,
    ):
        self.beams: list[BeamSearchSequence] = [
            BeamSearchSequence(
                tokens=prompt_tokens,
                logprobs=[] if logprobs is None else list(logprobs),
                lora_request=lora_request,
                **kwargs,
            )
        ]
        self.completed: list[BeamSearchSequence] = []

beams `instance-attribute` ¶

beams: list[BeamSearchSequence] = [
    BeamSearchSequence(
        tokens=prompt_tokens,
        logprobs=[] if logprobs is None else list(logprobs),
        lora_request=lora_request,
        **kwargs,
    )
]

completed `instance-attribute` ¶

completed: list[BeamSearchSequence] = []

init ¶

__init__(
    prompt_tokens: list[int],
    lora_request: LoRARequest | None = None,
    logprobs: list[dict[int, Logprob]] | None = None,
    **kwargs,
)

Source code in vllm/beam_search.py

def __init__(
    self,
    prompt_tokens: list[int],
    lora_request: LoRARequest | None = None,
    logprobs: list[dict[int, Logprob]] | None = None,
    **kwargs,
):
    self.beams: list[BeamSearchSequence] = [
        BeamSearchSequence(
            tokens=prompt_tokens,
            logprobs=[] if logprobs is None else list(logprobs),
            lora_request=lora_request,
            **kwargs,
        )
    ]
    self.completed: list[BeamSearchSequence] = []

BeamSearchOutput `dataclass` ¶

The output of beam search. It contains the list of the best beam search sequences. The length of the list is equal to the beam width.

Source code in vllm/beam_search.py

@dataclass
class BeamSearchOutput:
    """The output of beam search.
    It contains the list of the best beam search sequences.
    The length of the list is equal to the beam width.
    """

    sequences: list[BeamSearchSequence]

sequences `instance-attribute` ¶

sequences: list[BeamSearchSequence]

init ¶

__init__(sequences: list[BeamSearchSequence]) -> None

BeamSearchSequence `dataclass` ¶

A sequence for beam search. It keeps track of the tokens and the log probability of the sequence. The text field is optional and will only be filled when the sequence is about to be returned to the user.

Source code in vllm/beam_search.py

@dataclass
class BeamSearchSequence:
    """A sequence for beam search.
    It keeps track of the tokens and the log probability of the sequence.
    The text field is optional and will only be filled when the sequence is
    about to be returned to the user.
    """

    # The tokens include the prompt.
    tokens: list[int]
    logprobs: list[dict[int, Logprob]]
    lora_request: LoRARequest | None = None
    cum_logprob: float = 0.0
    text: str | None = None
    finish_reason: str | None = None
    stop_reason: int | str | None = None
    multi_modal_data: Optional["MultiModalDataDict"] = None
    mm_processor_kwargs: dict[str, Any] | None = None

cum_logprob `class-attribute` `instance-attribute` ¶

cum_logprob: float = 0.0

finish_reason `class-attribute` `instance-attribute` ¶

finish_reason: str | None = None

logprobs `instance-attribute` ¶

logprobs: list[dict[int, Logprob]]

lora_request `class-attribute` `instance-attribute` ¶

lora_request: LoRARequest | None = None

mm_processor_kwargs `class-attribute` `instance-attribute` ¶

mm_processor_kwargs: dict[str, Any] | None = None

multi_modal_data `class-attribute` `instance-attribute` ¶

multi_modal_data: Optional[MultiModalDataDict] = None

stop_reason `class-attribute` `instance-attribute` ¶

stop_reason: int | str | None = None

text `class-attribute` `instance-attribute` ¶

text: str | None = None

tokens `instance-attribute` ¶

tokens: list[int]

init ¶

__init__(
    tokens: list[int],
    logprobs: list[dict[int, Logprob]],
    lora_request: LoRARequest | None = None,
    cum_logprob: float = 0.0,
    text: str | None = None,
    finish_reason: str | None = None,
    stop_reason: int | str | None = None,
    multi_modal_data: Optional[MultiModalDataDict] = None,
    mm_processor_kwargs: dict[str, Any] | None = None,
) -> None

create_sort_beams_key_function ¶

create_sort_beams_key_function(
    eos_token_id: int, length_penalty: float
)

Source code in vllm/beam_search.py

def create_sort_beams_key_function(eos_token_id: int, length_penalty: float):
    def sort_beams_key(x: BeamSearchSequence) -> float:
        return get_beam_search_score(
            x.tokens, x.cum_logprob, eos_token_id, length_penalty
        )

    return sort_beams_key

get_beam_search_score ¶

get_beam_search_score(
    tokens: list[int],
    cumulative_logprob: float,
    eos_token_id: int,
    length_penalty: float = 1.0,
) -> float

Calculate the beam search score with length penalty.

Adapted from

https://github.com/huggingface/transformers/blob/ccb92be23def445f2afdea94c31286f84b89eb5b/src/transformers/generation/beam_search.py#L938

Source code in vllm/beam_search.py

def get_beam_search_score(
    tokens: list[int],
    cumulative_logprob: float,
    eos_token_id: int,
    length_penalty: float = 1.0,
) -> float:
    """Calculate the beam search score with length penalty.

    Adapted from

    https://github.com/huggingface/transformers/blob/ccb92be23def445f2afdea94c31286f84b89eb5b/src/transformers/generation/beam_search.py#L938
    """
    seq_len = len(tokens)
    if tokens[-1] == eos_token_id:
        seq_len -= 1

    return cumulative_logprob / (seq_len**length_penalty)

vllm.beam_search ¶

BeamSearchInstance ¶

beams instance-attribute ¶

completed instance-attribute ¶

__init__ ¶

BeamSearchOutput dataclass ¶

sequences instance-attribute ¶

__init__ ¶

BeamSearchSequence dataclass ¶

cum_logprob class-attribute instance-attribute ¶

finish_reason class-attribute instance-attribute ¶

logprobs instance-attribute ¶

lora_request class-attribute instance-attribute ¶

mm_processor_kwargs class-attribute instance-attribute ¶

multi_modal_data class-attribute instance-attribute ¶

stop_reason class-attribute instance-attribute ¶

text class-attribute instance-attribute ¶

tokens instance-attribute ¶

__init__ ¶

create_sort_beams_key_function ¶

get_beam_search_score ¶

beams `instance-attribute` ¶

completed `instance-attribute` ¶

init ¶

BeamSearchOutput `dataclass` ¶

sequences `instance-attribute` ¶

init ¶

BeamSearchSequence `dataclass` ¶

cum_logprob `class-attribute` `instance-attribute` ¶

finish_reason `class-attribute` `instance-attribute` ¶

logprobs `instance-attribute` ¶

lora_request `class-attribute` `instance-attribute` ¶

mm_processor_kwargs `class-attribute` `instance-attribute` ¶

multi_modal_data `class-attribute` `instance-attribute` ¶

stop_reason `class-attribute` `instance-attribute` ¶

text `class-attribute` `instance-attribute` ¶

tokens `instance-attribute` ¶

init ¶