vllm.lora.utils ¶

_all_lora_classes `module-attribute` ¶

_all_lora_classes: set[type[BaseLayerWithLoRA]] = {
    VocabParallelEmbeddingWithLoRA,
    ColumnParallelLinearWithLoRA,
    MergedColumnParallelLinearWithLoRA,
    QKVParallelLinearWithLoRA,
    MergedQKVParallelLinearWithLoRA,
    RowParallelLinearWithLoRA,
    ReplicatedLinearWithLoRA,
    LogitsProcessorWithLoRA,
    ColumnParallelLinearWithShardedLoRA,
    QKVParallelLinearWithShardedLoRA,
    MergedColumnParallelLinearWithShardedLoRA,
    MergedQKVParallelLinearWithShardedLoRA,
    RowParallelLinearWithShardedLoRA,
    FusedMoEWithLoRA,
}

logger `module-attribute` ¶

logger = init_logger(__name__)

from_layer ¶

from_layer(
    layer: Module,
    max_loras: int,
    lora_config: LoRAConfig,
    packed_modules_list: list,
    model_config: PretrainedConfig | None = None,
) -> Module

Source code in vllm/lora/utils.py

def from_layer(
    layer: nn.Module,
    max_loras: int,
    lora_config: LoRAConfig,
    packed_modules_list: list,
    model_config: PretrainedConfig | None = None,
) -> nn.Module:
    for lora_cls in _all_lora_classes:
        # specifying kwargs so they can be easily accessed in decorator
        if lora_cls.can_replace_layer(
            source_layer=layer,
            lora_config=lora_config,
            packed_modules_list=packed_modules_list,
            model_config=model_config,
        ):
            instance_layer = lora_cls(layer)
            instance_layer.create_lora_weights(max_loras, lora_config, model_config)
            return instance_layer
    return layer

from_layer_logits_processor ¶

from_layer_logits_processor(
    layer: LogitsProcessor,
    lm_head: ParallelLMHead,
    max_loras: int,
    lora_config: LoRAConfig,
    model_config: PretrainedConfig | None = None,
) -> LogitsProcessorWithLoRA

Source code in vllm/lora/utils.py

def from_layer_logits_processor(
    layer: "LogitsProcessor",
    lm_head: "ParallelLMHead",
    max_loras: int,
    lora_config: LoRAConfig,
    model_config: PretrainedConfig | None = None,
) -> LogitsProcessorWithLoRA:
    ret = LogitsProcessorWithLoRA(
        layer,
        lm_head.embedding_dim,
        lm_head.weight.dtype,
        lm_head.weight.device,
        lm_head.get_sharded_to_full_mapping(),
    )
    ret.create_lora_weights(max_loras, lora_config, model_config)
    return ret

get_adapter_absolute_path ¶

get_adapter_absolute_path(lora_path: str) -> str

Resolves the given lora_path to an absolute local path.

If the lora_path is identified as a Hugging Face model identifier, it will download the model and return the local snapshot path. Otherwise, it treats the lora_path as a local file path and converts it to an absolute path.

lora_path (str): The path to the lora model, which can be an absolute path, a relative path, or a Hugging Face model identifier.

Returns: str: The resolved absolute local path to the lora model.

Source code in vllm/lora/utils.py

def get_adapter_absolute_path(lora_path: str) -> str:
    """
    Resolves the given lora_path to an absolute local path.

    If the lora_path is identified as a Hugging Face model identifier,
    it will download the model and return the local snapshot path.
    Otherwise, it treats the lora_path as a local file path and
    converts it to an absolute path.

    Parameters:
    lora_path (str): The path to the lora model, which can be an absolute path,
                     a relative path, or a Hugging Face model identifier.

    Returns:
    str: The resolved absolute local path to the lora model.
    """

    # Check if the path is an absolute path. Return it no matter exists or not.
    if os.path.isabs(lora_path):
        return lora_path

    # If the path starts with ~, expand the user home directory.
    if lora_path.startswith("~"):
        return os.path.expanduser(lora_path)

    # Check if the expanded relative path exists locally.
    if os.path.exists(lora_path):
        return os.path.abspath(lora_path)

    # If the path does not exist locally, assume it's a Hugging Face repo.
    try:
        local_snapshot_path = huggingface_hub.snapshot_download(repo_id=lora_path)
    except (
        HfHubHTTPError,
        RepositoryNotFoundError,
        EntryNotFoundError,
        HFValidationError,
    ):
        # Handle errors that may occur during the download
        # Return original path instead of throwing error here
        logger.exception("Error downloading the HuggingFace model")
        return lora_path

    return local_snapshot_path

get_supported_lora_modules ¶

get_supported_lora_modules(model: Module) -> list[str]

In vLLM, all linear layers support LoRA.

Source code in vllm/lora/utils.py

def get_supported_lora_modules(model: nn.Module) -> list[str]:
    """
    In vLLM, all linear layers support LoRA.
    """

    supported_lora_modules: set[str] = set()
    for name, module in model.named_modules():
        # get the embedding modules if the module's embedding_modules
        # is not empty.
        embedding_modules = getattr(module, "embedding_modules", None)
        if embedding_modules is not None:
            for name in embedding_modules:
                supported_lora_modules.add(name)

        # get all the linear subfixes.
        if isinstance(module, (LinearBase,)):
            supported_lora_modules.add(name.split(".")[-1])

        if isinstance(module, (FusedMoE,)):
            supported_lora_modules.add(name.split(".")[-1])

    return list(supported_lora_modules)

is_moe_model ¶

is_moe_model(model: Module) -> bool

Checks if the model contains FusedMoE layers and warns the user.

Source code in vllm/lora/utils.py

def is_moe_model(model: nn.Module) -> bool:
    """Checks if the model contains FusedMoE layers and warns the user."""
    if any(isinstance(module, FusedMoE) for module in model.modules()):
        logger.info_once("MoE model detected. Using fused MoE LoRA implementation.")
        return True
    return False

is_regex_target_modules ¶

is_regex_target_modules(
    load_modules: str | list[str],
    expected_lora_modules: list[str],
) -> bool

PEFT supports passing target_modules in the form of regular expressions, such as model.*(q_proj|k_proj|v_proj)$. This function is mainly used to determine whether the suffix in the regular expression is present in the expected_lora_modules.

Source code in vllm/lora/utils.py

def is_regex_target_modules(
    load_modules: str | list[str], expected_lora_modules: list[str]
) -> bool:
    """
    PEFT supports passing `target_modules` in the form of regular expressions,
    such as `model.*(q_proj|k_proj|v_proj)$`. This function is mainly used to
    determine whether the suffix in the regular expression is present in the
    `expected_lora_modules`.
    """

    def is_valid_regex(pattern):
        try:
            re.compile(pattern)
            return True
        except re.error:
            return False

    def is_subset(sub_list, full_list):
        return set(sub_list).issubset(set(full_list))

    # Similar to PEFT's processing logic, regex-related operations are only
    #  executed when the load_modules is a `str`.
    if not isinstance(load_modules, str):
        return False

    if is_valid_regex(load_modules):
        match = re.search(r"\((.*?)\)\$?$", load_modules)
        if match:
            suffix = match.group(1).split("|")
            return is_subset(suffix, expected_lora_modules)
    return False

parse_fine_tuned_lora_name ¶

parse_fine_tuned_lora_name(
    name: str,
    weights_mapper: Optional[WeightsMapper] = None,
) -> tuple[str, bool]

Parse the name of lora weights.

Parameters:

Name	Type	Description	Default
`name`	`str`	the name of the fine-tuned LoRA, e.g. base_model.model.dense1.weight	required
`weights_mapper`	`Optional[WeightsMapper]`	maps the name of weight, e.g. `model.` -> `language_model.model.`,	`None`

return: tuple(module_name, is_lora_a): module_name: the name of the module, e.g. model.dense1, is_lora_a whether the tensor is lora_a or lora_b.

Source code in vllm/lora/utils.py

def parse_fine_tuned_lora_name(
    name: str, weights_mapper: Optional["WeightsMapper"] = None
) -> tuple[str, bool]:
    """Parse the name of lora weights.

    args:
        name: the name of the fine-tuned LoRA, e.g.
            base_model.model.dense1.weight
        weights_mapper: maps the name of weight, e.g.
            `model.` -> `language_model.model.`,
    return:
        tuple(module_name, is_lora_a):
            module_name: the name of the module, e.g. model.dense1,
            is_lora_a whether the tensor is lora_a or lora_b.
    """

    # LoRA weight qualified name usually starts with `base_model.model.`,
    # so we remove the prefix `base_model.model.` to make the following
    # mapping correctly.
    if name.startswith("base_model.model."):
        name = name.replace("base_model.model.", "")
        name = weights_mapper._map_name(name) if weights_mapper else name
        # recover the prefix `base_model.model.`
        name = "base_model.model." + name
    else:
        name = weights_mapper._map_name(name) if weights_mapper else name

    # In some situations, we may not start with `base_model.model.`.
    # If we don't (e.g., ibm-granite/granite-speech-3.3-8b),
    # we should keep the prefix intact.
    start_index = 2 if name.startswith("base_model.model.") else 0

    parts = name.split(".")
    if parts[-1] == "weight" and (parts[-2] == "lora_A" or parts[-2] == "lora_B"):
        new_name = ".".join(parts[start_index:-2])
        return new_name, parts[-2] == "lora_A"

    if parts[-1] == "lora_embedding_A" or parts[-1] == "lora_embedding_B":
        new_name = ".".join(parts[start_index:-1])
        return new_name, parts[-1] == "lora_embedding_A"

    raise ValueError(f"{name} is unsupported LoRA weight")

process_packed_modules_mapping ¶

process_packed_modules_mapping(
    model: Module,
) -> dict[str, list[str]]

Source code in vllm/lora/utils.py

def process_packed_modules_mapping(model: nn.Module) -> dict[str, list[str]]:
    if is_moe_model(model):
        if moe_packed_mapping := get_moe_expert_mapping(model):
            # This method generates and returns a dictionary mapping packed module
            # names to lists of their corresponding submodule names. It includes
            # both static mappings and dynamic mappings for expert layers, where
            # the expert indices are expanded based on the configured number
            # of routed experts.
            packed_modules_mapping = get_packed_modules_mapping(model)

            packed_modules_mapping["experts"] = [
                weight_name.rstrip(".") for _, weight_name, _, _ in moe_packed_mapping
            ]

            return packed_modules_mapping
        else:
            raise AttributeError(
                "To support LoRA for MoE model, "
                "'get_expert_mapping' must be implemented"
            )
    else:
        return get_packed_modules_mapping(model)

replace_submodule ¶

replace_submodule(
    model: Module, module_name: str, new_module: Module
) -> Module

Replace a submodule in a model with a new module.

Source code in vllm/lora/utils.py

def replace_submodule(
    model: nn.Module, module_name: str, new_module: nn.Module
) -> nn.Module:
    """Replace a submodule in a model with a new module."""
    parent = model.get_submodule(".".join(module_name.split(".")[:-1]))
    target_name = module_name.split(".")[-1]
    setattr(parent, target_name, new_module)
    return new_module

vllm.lora.utils ¶

_all_lora_classes module-attribute ¶

logger module-attribute ¶

from_layer ¶

from_layer_logits_processor ¶

get_adapter_absolute_path ¶

get_supported_lora_modules ¶

is_moe_model ¶

is_regex_target_modules ¶

parse_fine_tuned_lora_name ¶

process_packed_modules_mapping ¶

replace_submodule ¶

_all_lora_classes `module-attribute` ¶

logger `module-attribute` ¶