vllm.model_executor.layers.quantization.utils.bitblas_utils ¶
   BITBLAS_OPTIMIZE_FEATURES  module-attribute  ¶
    BITBLAS_OPTIMIZE_FEATURES_CONTIGUOUS  module-attribute  ¶
    _check_bitblas_supported ¶
 _check_bitblas_supported(
    quant_type: ScalarType,
    group_size: int | None,
    has_zp: bool,
    device_capability: int | None = None,
) -> tuple[bool, str | None]
Source code in vllm/model_executor/layers/quantization/utils/bitblas_utils.py
   bitblas_is_k_full ¶
     bitblas_make_empty_g_idx ¶
     bitblas_make_empty_zp ¶
     bitblas_repeat_scales_on_all_ranks ¶
 bitblas_repeat_scales_on_all_ranks(
    act_order: bool, group_size: int, is_row_parallel: bool
) -> bool
Source code in vllm/model_executor/layers/quantization/utils/bitblas_utils.py
   bitblas_sort_g_idx ¶
     check_bitblas_supported ¶
 check_bitblas_supported(
    quant_type: ScalarType,
    group_size: int,
    has_zp: bool = False,
    device_capability: int | None = None,
) -> bool
Source code in vllm/model_executor/layers/quantization/utils/bitblas_utils.py
    check_bitblas_supports_shape ¶
 check_bitblas_supports_shape(
    output_size_per_partition: int,
    input_size_per_partition: int,
    input_size: int,
    group_size: int,
) -> tuple[bool, str | None]
Source code in vllm/model_executor/layers/quantization/utils/bitblas_utils.py
   query_bitblas_supported_quant_types ¶
  Source code in vllm/model_executor/layers/quantization/utils/bitblas_utils.py
   unpack_gptq_qweight ¶
  Source code in vllm/model_executor/layers/quantization/utils/bitblas_utils.py
   unpack_gptq_qzeros ¶
 unpack_gptq_qzeros(
    qzeros, bits, is_gptq_v2=False
) -> Tensor
Source code in vllm/model_executor/layers/quantization/utils/bitblas_utils.py
   verify_bitblas_supported ¶
 verify_bitblas_supported(
    quant_type: ScalarType,
    group_size: int,
    has_zp: bool = False,
) -> None
Source code in vllm/model_executor/layers/quantization/utils/bitblas_utils.py
    verify_bitblas_supports_shape ¶
 verify_bitblas_supports_shape(
    output_size_per_partition: int,
    input_size_per_partition: int,
    input_size: int,
    group_size: int,
) -> None