vllm.plugins.io_processors.interface ¶

IOProcessorInput `module-attribute` ¶

IOProcessorInput = TypeVar('IOProcessorInput')

IOProcessorOutput `module-attribute` ¶

IOProcessorOutput = TypeVar('IOProcessorOutput')

IOProcessor ¶

Bases: ABC, Generic[IOProcessorInput, IOProcessorOutput]

Source code in vllm/plugins/io_processors/interface.py

class IOProcessor(ABC, Generic[IOProcessorInput, IOProcessorOutput]):
    def __init__(self, vllm_config: VllmConfig):
        self.vllm_config = vllm_config

    @abstractmethod
    def pre_process(
        self,
        prompt: IOProcessorInput,
        request_id: str | None = None,
        **kwargs,
    ) -> PromptType | Sequence[PromptType]:
        raise NotImplementedError

    async def pre_process_async(
        self,
        prompt: IOProcessorInput,
        request_id: str | None = None,
        **kwargs,
    ) -> PromptType | Sequence[PromptType]:
        return self.pre_process(prompt, request_id, **kwargs)

    @abstractmethod
    def post_process(
        self,
        model_output: Sequence[PoolingRequestOutput],
        request_id: str | None = None,
        **kwargs,
    ) -> IOProcessorOutput:
        raise NotImplementedError

    async def post_process_async(
        self,
        model_output: AsyncGenerator[tuple[int, PoolingRequestOutput]],
        request_id: str | None = None,
        **kwargs,
    ) -> IOProcessorOutput:
        # We cannot guarantee outputs are returned in the same order they were
        # fed to vLLM.
        # Let's sort them by id before post_processing
        sorted_output = sorted(
            [(i, item) async for i, item in model_output], key=lambda output: output[0]
        )
        collected_output = [output[1] for output in sorted_output]
        return self.post_process(collected_output, request_id, **kwargs)

    @abstractmethod
    def parse_request(self, request: Any) -> IOProcessorInput:
        raise NotImplementedError

    def validate_or_generate_params(
        self, params: SamplingParams | PoolingParams | None = None
    ) -> SamplingParams | PoolingParams:
        return params or PoolingParams()

    @abstractmethod
    def output_to_response(
        self, plugin_output: IOProcessorOutput
    ) -> IOProcessorResponse:
        raise NotImplementedError

vllm_config `instance-attribute` ¶

vllm_config = vllm_config

init ¶

__init__(vllm_config: VllmConfig)

Source code in vllm/plugins/io_processors/interface.py

def __init__(self, vllm_config: VllmConfig):
    self.vllm_config = vllm_config

output_to_response `abstractmethod` ¶

output_to_response(
    plugin_output: IOProcessorOutput,
) -> IOProcessorResponse

Source code in vllm/plugins/io_processors/interface.py

@abstractmethod
def output_to_response(
    self, plugin_output: IOProcessorOutput
) -> IOProcessorResponse:
    raise NotImplementedError

parse_request `abstractmethod` ¶

parse_request(request: Any) -> IOProcessorInput

Source code in vllm/plugins/io_processors/interface.py

@abstractmethod
def parse_request(self, request: Any) -> IOProcessorInput:
    raise NotImplementedError

post_process `abstractmethod` ¶

post_process(
    model_output: Sequence[PoolingRequestOutput],
    request_id: str | None = None,
    **kwargs,
) -> IOProcessorOutput

Source code in vllm/plugins/io_processors/interface.py

@abstractmethod
def post_process(
    self,
    model_output: Sequence[PoolingRequestOutput],
    request_id: str | None = None,
    **kwargs,
) -> IOProcessorOutput:
    raise NotImplementedError

post_process_async `async` ¶

post_process_async(
    model_output: AsyncGenerator[
        tuple[int, PoolingRequestOutput]
    ],
    request_id: str | None = None,
    **kwargs,
) -> IOProcessorOutput

Source code in vllm/plugins/io_processors/interface.py

async def post_process_async(
    self,
    model_output: AsyncGenerator[tuple[int, PoolingRequestOutput]],
    request_id: str | None = None,
    **kwargs,
) -> IOProcessorOutput:
    # We cannot guarantee outputs are returned in the same order they were
    # fed to vLLM.
    # Let's sort them by id before post_processing
    sorted_output = sorted(
        [(i, item) async for i, item in model_output], key=lambda output: output[0]
    )
    collected_output = [output[1] for output in sorted_output]
    return self.post_process(collected_output, request_id, **kwargs)

pre_process `abstractmethod` ¶

pre_process(
    prompt: IOProcessorInput,
    request_id: str | None = None,
    **kwargs,
) -> PromptType | Sequence[PromptType]

Source code in vllm/plugins/io_processors/interface.py

@abstractmethod
def pre_process(
    self,
    prompt: IOProcessorInput,
    request_id: str | None = None,
    **kwargs,
) -> PromptType | Sequence[PromptType]:
    raise NotImplementedError

pre_process_async `async` ¶

pre_process_async(
    prompt: IOProcessorInput,
    request_id: str | None = None,
    **kwargs,
) -> PromptType | Sequence[PromptType]

Source code in vllm/plugins/io_processors/interface.py

async def pre_process_async(
    self,
    prompt: IOProcessorInput,
    request_id: str | None = None,
    **kwargs,
) -> PromptType | Sequence[PromptType]:
    return self.pre_process(prompt, request_id, **kwargs)

validate_or_generate_params ¶

validate_or_generate_params(
    params: SamplingParams | PoolingParams | None = None,
) -> SamplingParams | PoolingParams

Source code in vllm/plugins/io_processors/interface.py

def validate_or_generate_params(
    self, params: SamplingParams | PoolingParams | None = None
) -> SamplingParams | PoolingParams:
    return params or PoolingParams()

vllm.plugins.io_processors.interface ¶

IOProcessorInput module-attribute ¶

IOProcessorOutput module-attribute ¶

IOProcessor ¶

vllm_config instance-attribute ¶

__init__ ¶

output_to_response abstractmethod ¶

parse_request abstractmethod ¶

post_process abstractmethod ¶

post_process_async async ¶

pre_process abstractmethod ¶

pre_process_async async ¶

validate_or_generate_params ¶

IOProcessorInput `module-attribute` ¶

IOProcessorOutput `module-attribute` ¶

vllm_config `instance-attribute` ¶

init ¶

output_to_response `abstractmethod` ¶

parse_request `abstractmethod` ¶

post_process `abstractmethod` ¶

post_process_async `async` ¶

pre_process `abstractmethod` ¶

pre_process_async `async` ¶