vllm.attention.backends.registry ¶

Attention backend registry

_OVERRIDES `module-attribute` ¶

_OVERRIDES: dict[AttentionBackendEnum, str] = {}

logger `module-attribute` ¶

logger = init_logger(__name__)

AttentionBackendEnum ¶

Bases: Enum

Enumeration of all supported attention backends.

The enum value is the default class path, but this can be overridden at runtime using register_backend().

To get the actual backend class (respecting overrides), use: backend.get_class()

Source code in vllm/attention/backends/registry.py

class AttentionBackendEnum(enum.Enum, metaclass=_AttentionBackendEnumMeta):
    """Enumeration of all supported attention backends.

    The enum value is the default class path, but this can be overridden
    at runtime using register_backend().

    To get the actual backend class (respecting overrides), use:
        backend.get_class()
    """

    FLASH_ATTN = "vllm.v1.attention.backends.flash_attn.FlashAttentionBackend"
    TRITON_ATTN = "vllm.v1.attention.backends.triton_attn.TritonAttentionBackend"
    XFORMERS = "vllm.v1.attention.backends.xformers.XFormersAttentionBackend"
    ROCM_ATTN = "vllm.v1.attention.backends.rocm_attn.RocmAttentionBackend"
    ROCM_AITER_MLA = "vllm.v1.attention.backends.mla.rocm_aiter_mla.AiterMLABackend"
    ROCM_AITER_FA = (
        "vllm.v1.attention.backends.rocm_aiter_fa.AiterFlashAttentionBackend"
    )
    TORCH_SDPA = ""  # this tag is only used for ViT
    FLASHINFER = "vllm.v1.attention.backends.flashinfer.FlashInferBackend"
    FLASHINFER_MLA = (
        "vllm.v1.attention.backends.mla.flashinfer_mla.FlashInferMLABackend"
    )
    TRITON_MLA = "vllm.v1.attention.backends.mla.triton_mla.TritonMLABackend"
    CUTLASS_MLA = "vllm.v1.attention.backends.mla.cutlass_mla.CutlassMLABackend"
    FLASHMLA = "vllm.v1.attention.backends.mla.flashmla.FlashMLABackend"
    FLASHMLA_SPARSE = (
        "vllm.v1.attention.backends.mla.flashmla_sparse.FlashMLASparseBackend"
    )
    FLASH_ATTN_MLA = "vllm.v1.attention.backends.mla.flashattn_mla.FlashAttnMLABackend"
    PALLAS = "vllm.v1.attention.backends.pallas.PallasAttentionBackend"
    IPEX = "vllm.v1.attention.backends.ipex.IpexAttentionBackend"
    NO_ATTENTION = "vllm.v1.attention.backends.no_attention.NoAttentionBackend"
    FLEX_ATTENTION = "vllm.v1.attention.backends.flex_attention.FlexAttentionBackend"
    TREE_ATTN = "vllm.v1.attention.backends.tree_attn.TreeAttentionBackend"
    ROCM_AITER_UNIFIED_ATTN = (
        "vllm.v1.attention.backends.rocm_aiter_unified_attn."
        "RocmAiterUnifiedAttentionBackend"
    )
    CPU_ATTN = "vllm.v1.attention.backends.cpu_attn.CPUAttentionBackend"
    # Placeholder for third-party/custom backends - must be registered before use
    CUSTOM = ""

    def get_path(self, include_classname: bool = True) -> str:
        """Get the class path for this backend (respects overrides).

        Returns:
            The fully qualified class path string

        Raises:
            ValueError: If Backend.CUSTOM is used without being registered
        """
        path = _OVERRIDES.get(self, self.value)
        if not path:
            raise ValueError(
                f"Backend {self.name} must be registered before use. "
                f"Use register_backend(Backend.{self.name}, 'your.module.YourClass')"
            )
        if not include_classname:
            path = path.rsplit(".", 1)[0]
        return path

    def get_class(self) -> "type[AttentionBackend]":
        """Get the backend class (respects overrides).

        Returns:
            The backend class

        Raises:
            ImportError: If the backend class cannot be imported
            ValueError: If Backend.CUSTOM is used without being registered
        """
        return resolve_obj_by_qualname(self.get_path())

    def is_overridden(self) -> bool:
        """Check if this backend has been overridden.

        Returns:
            True if the backend has a registered override
        """
        return self in _OVERRIDES

    def clear_override(self) -> None:
        """Clear any override for this backend, reverting to the default."""
        _OVERRIDES.pop(self, None)

CPU_ATTN `class-attribute` `instance-attribute` ¶

CPU_ATTN = "vllm.v1.attention.backends.cpu_attn.CPUAttentionBackend"

CUSTOM `class-attribute` `instance-attribute` ¶

CUSTOM = ''

CUTLASS_MLA `class-attribute` `instance-attribute` ¶

CUTLASS_MLA = "vllm.v1.attention.backends.mla.cutlass_mla.CutlassMLABackend"

FLASHINFER `class-attribute` `instance-attribute` ¶

FLASHINFER = "vllm.v1.attention.backends.flashinfer.FlashInferBackend"

FLASHINFER_MLA `class-attribute` `instance-attribute` ¶

FLASHINFER_MLA = "vllm.v1.attention.backends.mla.flashinfer_mla.FlashInferMLABackend"

FLASHMLA `class-attribute` `instance-attribute` ¶

FLASHMLA = "vllm.v1.attention.backends.mla.flashmla.FlashMLABackend"

FLASHMLA_SPARSE `class-attribute` `instance-attribute` ¶

FLASHMLA_SPARSE = "vllm.v1.attention.backends.mla.flashmla_sparse.FlashMLASparseBackend"

FLASH_ATTN `class-attribute` `instance-attribute` ¶

FLASH_ATTN = "vllm.v1.attention.backends.flash_attn.FlashAttentionBackend"

FLASH_ATTN_MLA `class-attribute` `instance-attribute` ¶

FLASH_ATTN_MLA = "vllm.v1.attention.backends.mla.flashattn_mla.FlashAttnMLABackend"

FLEX_ATTENTION `class-attribute` `instance-attribute` ¶

FLEX_ATTENTION = "vllm.v1.attention.backends.flex_attention.FlexAttentionBackend"

IPEX `class-attribute` `instance-attribute` ¶

IPEX = (
    "vllm.v1.attention.backends.ipex.IpexAttentionBackend"
)

NO_ATTENTION `class-attribute` `instance-attribute` ¶

NO_ATTENTION = "vllm.v1.attention.backends.no_attention.NoAttentionBackend"

PALLAS `class-attribute` `instance-attribute` ¶

PALLAS = "vllm.v1.attention.backends.pallas.PallasAttentionBackend"

ROCM_AITER_FA `class-attribute` `instance-attribute` ¶

ROCM_AITER_FA = "vllm.v1.attention.backends.rocm_aiter_fa.AiterFlashAttentionBackend"

ROCM_AITER_MLA `class-attribute` `instance-attribute` ¶

ROCM_AITER_MLA = "vllm.v1.attention.backends.mla.rocm_aiter_mla.AiterMLABackend"

ROCM_AITER_UNIFIED_ATTN `class-attribute` `instance-attribute` ¶

ROCM_AITER_UNIFIED_ATTN = "vllm.v1.attention.backends.rocm_aiter_unified_attn.RocmAiterUnifiedAttentionBackend"

ROCM_ATTN `class-attribute` `instance-attribute` ¶

ROCM_ATTN = "vllm.v1.attention.backends.rocm_attn.RocmAttentionBackend"

TORCH_SDPA `class-attribute` `instance-attribute` ¶

TORCH_SDPA = ''

TREE_ATTN `class-attribute` `instance-attribute` ¶

TREE_ATTN = "vllm.v1.attention.backends.tree_attn.TreeAttentionBackend"

TRITON_ATTN `class-attribute` `instance-attribute` ¶

TRITON_ATTN = "vllm.v1.attention.backends.triton_attn.TritonAttentionBackend"

TRITON_MLA `class-attribute` `instance-attribute` ¶

TRITON_MLA = "vllm.v1.attention.backends.mla.triton_mla.TritonMLABackend"

XFORMERS `class-attribute` `instance-attribute` ¶

XFORMERS = "vllm.v1.attention.backends.xformers.XFormersAttentionBackend"

clear_override ¶

clear_override() -> None

Clear any override for this backend, reverting to the default.

Source code in vllm/attention/backends/registry.py

def clear_override(self) -> None:
    """Clear any override for this backend, reverting to the default."""
    _OVERRIDES.pop(self, None)

get_class ¶

get_class() -> type[AttentionBackend]

Get the backend class (respects overrides).

Returns:

Type	Description
`type[AttentionBackend]`	The backend class

Raises:

Type	Description
`ImportError`	If the backend class cannot be imported
`ValueError`	If Backend.CUSTOM is used without being registered

Source code in vllm/attention/backends/registry.py

def get_class(self) -> "type[AttentionBackend]":
    """Get the backend class (respects overrides).

    Returns:
        The backend class

    Raises:
        ImportError: If the backend class cannot be imported
        ValueError: If Backend.CUSTOM is used without being registered
    """
    return resolve_obj_by_qualname(self.get_path())

get_path ¶

get_path(include_classname: bool = True) -> str

Get the class path for this backend (respects overrides).

Returns:

Type	Description
`str`	The fully qualified class path string

Raises:

Type	Description
`ValueError`	If Backend.CUSTOM is used without being registered

Source code in vllm/attention/backends/registry.py

def get_path(self, include_classname: bool = True) -> str:
    """Get the class path for this backend (respects overrides).

    Returns:
        The fully qualified class path string

    Raises:
        ValueError: If Backend.CUSTOM is used without being registered
    """
    path = _OVERRIDES.get(self, self.value)
    if not path:
        raise ValueError(
            f"Backend {self.name} must be registered before use. "
            f"Use register_backend(Backend.{self.name}, 'your.module.YourClass')"
        )
    if not include_classname:
        path = path.rsplit(".", 1)[0]
    return path

is_overridden ¶

is_overridden() -> bool

Check if this backend has been overridden.

Returns:

Type	Description
`bool`	True if the backend has a registered override

Source code in vllm/attention/backends/registry.py

def is_overridden(self) -> bool:
    """Check if this backend has been overridden.

    Returns:
        True if the backend has a registered override
    """
    return self in _OVERRIDES

_AttentionBackendEnumMeta ¶

Bases: EnumMeta

Metaclass for AttentionBackendEnum to provide better error messages.

Source code in vllm/attention/backends/registry.py

class _AttentionBackendEnumMeta(enum.EnumMeta):
    """Metaclass for AttentionBackendEnum to provide better error messages."""

    def __getitem__(cls, name: str):
        """Get backend by name with helpful error messages."""
        try:
            return super().__getitem__(name)
        except KeyError:
            members = cast("dict[str, AttentionBackendEnum]", cls.__members__).values()
            valid_backends = ", ".join(m.name for m in members)
            raise ValueError(
                f"Unknown attention backend: '{name}'. "
                f"Valid options are: {valid_backends}"
            ) from None

getitem ¶

__getitem__(name: str)

Get backend by name with helpful error messages.

Source code in vllm/attention/backends/registry.py

def __getitem__(cls, name: str):
    """Get backend by name with helpful error messages."""
    try:
        return super().__getitem__(name)
    except KeyError:
        members = cast("dict[str, AttentionBackendEnum]", cls.__members__).values()
        valid_backends = ", ".join(m.name for m in members)
        raise ValueError(
            f"Unknown attention backend: '{name}'. "
            f"Valid options are: {valid_backends}"
        ) from None

_Backend ¶

Deprecated: Use AttentionBackendEnum instead.

This class is provided for backwards compatibility with plugins and will be removed in a future release.

Source code in vllm/attention/backends/registry.py

class _Backend(metaclass=_BackendMeta):
    """Deprecated: Use AttentionBackendEnum instead.

    This class is provided for backwards compatibility with plugins
    and will be removed in a future release.
    """

    pass

_BackendMeta ¶

Bases: type

Metaclass to provide deprecation warnings when accessing _Backend.

Source code in vllm/attention/backends/registry.py

class _BackendMeta(type):
    """Metaclass to provide deprecation warnings when accessing _Backend."""

    def __getattribute__(cls, name: str):
        if name not in ("__class__", "__mro__", "__name__"):
            logger.warning(
                "_Backend has been renamed to AttentionBackendEnum. "
                "Please update your code to use AttentionBackendEnum instead. "
                "_Backend will be removed in a future release."
            )
        return getattr(AttentionBackendEnum, name)

    def __getitem__(cls, name: str):
        logger.warning(
            "_Backend has been renamed to AttentionBackendEnum. "
            "Please update your code to use AttentionBackendEnum instead. "
            "_Backend will be removed in a future release."
        )
        return AttentionBackendEnum[name]

getattribute ¶

__getattribute__(name: str)

Source code in vllm/attention/backends/registry.py

def __getattribute__(cls, name: str):
    if name not in ("__class__", "__mro__", "__name__"):
        logger.warning(
            "_Backend has been renamed to AttentionBackendEnum. "
            "Please update your code to use AttentionBackendEnum instead. "
            "_Backend will be removed in a future release."
        )
    return getattr(AttentionBackendEnum, name)

getitem ¶

__getitem__(name: str)

Source code in vllm/attention/backends/registry.py

def __getitem__(cls, name: str):
    logger.warning(
        "_Backend has been renamed to AttentionBackendEnum. "
        "Please update your code to use AttentionBackendEnum instead. "
        "_Backend will be removed in a future release."
    )
    return AttentionBackendEnum[name]

register_backend ¶

register_backend(
    backend: AttentionBackendEnum,
    class_path: str | None = None,
) -> Callable[[type], type]

Register or override a backend implementation.

Parameters:

Name	Type	Description	Default
`backend`	`AttentionBackendEnum`	The AttentionBackendEnum member to register	required
`class_path`	`str \| None`	Optional class path. If not provided and used as decorator, will be auto-generated from the class.	`None`

Returns:

Type	Description
`Callable[[type], type]`	Decorator function if class_path is None, otherwise a no-op

Examples:

Override an existing backend¶

@register_backend(AttentionBackendEnum.FLASH_ATTN) class MyCustomFlashAttn: ...

Register a custom third-party backend¶

@register_backend(AttentionBackendEnum.CUSTOM) class MyCustomBackend: ...

Direct registration¶

register_backend( AttentionBackendEnum.CUSTOM, "my.module.MyCustomBackend" )

Source code in vllm/attention/backends/registry.py

def register_backend(
    backend: AttentionBackendEnum, class_path: str | None = None
) -> Callable[[type], type]:
    """Register or override a backend implementation.

    Args:
        backend: The AttentionBackendEnum member to register
        class_path: Optional class path. If not provided and used as
            decorator, will be auto-generated from the class.

    Returns:
        Decorator function if class_path is None, otherwise a no-op

    Examples:
        # Override an existing backend
        @register_backend(AttentionBackendEnum.FLASH_ATTN)
        class MyCustomFlashAttn:
            ...

        # Register a custom third-party backend
        @register_backend(AttentionBackendEnum.CUSTOM)
        class MyCustomBackend:
            ...

        # Direct registration
        register_backend(
            AttentionBackendEnum.CUSTOM,
            "my.module.MyCustomBackend"
        )
    """

    def decorator(cls: type) -> type:
        _OVERRIDES[backend] = f"{cls.__module__}.{cls.__qualname__}"
        return cls

    if class_path is not None:
        _OVERRIDES[backend] = class_path
        return lambda x: x

    return decorator

vllm.attention.backends.registry ¶

_OVERRIDES module-attribute ¶

logger module-attribute ¶

AttentionBackendEnum ¶

CPU_ATTN class-attribute instance-attribute ¶

CUSTOM class-attribute instance-attribute ¶

CUTLASS_MLA class-attribute instance-attribute ¶

FLASHINFER class-attribute instance-attribute ¶

FLASHINFER_MLA class-attribute instance-attribute ¶

FLASHMLA class-attribute instance-attribute ¶

FLASHMLA_SPARSE class-attribute instance-attribute ¶

FLASH_ATTN class-attribute instance-attribute ¶

FLASH_ATTN_MLA class-attribute instance-attribute ¶

FLEX_ATTENTION class-attribute instance-attribute ¶

IPEX class-attribute instance-attribute ¶

NO_ATTENTION class-attribute instance-attribute ¶

PALLAS class-attribute instance-attribute ¶

ROCM_AITER_FA class-attribute instance-attribute ¶

ROCM_AITER_MLA class-attribute instance-attribute ¶

ROCM_AITER_UNIFIED_ATTN class-attribute instance-attribute ¶

ROCM_ATTN class-attribute instance-attribute ¶

TORCH_SDPA class-attribute instance-attribute ¶

TREE_ATTN class-attribute instance-attribute ¶

TRITON_ATTN class-attribute instance-attribute ¶

TRITON_MLA class-attribute instance-attribute ¶

XFORMERS class-attribute instance-attribute ¶

clear_override ¶

get_class ¶

get_path ¶

is_overridden ¶

_AttentionBackendEnumMeta ¶

__getitem__ ¶

_Backend ¶

_BackendMeta ¶

__getattribute__ ¶

__getitem__ ¶

register_backend ¶

Override an existing backend¶

Register a custom third-party backend¶

Direct registration¶

_OVERRIDES `module-attribute` ¶

logger `module-attribute` ¶

CPU_ATTN `class-attribute` `instance-attribute` ¶

CUSTOM `class-attribute` `instance-attribute` ¶

CUTLASS_MLA `class-attribute` `instance-attribute` ¶

FLASHINFER `class-attribute` `instance-attribute` ¶

FLASHINFER_MLA `class-attribute` `instance-attribute` ¶

FLASHMLA `class-attribute` `instance-attribute` ¶

FLASHMLA_SPARSE `class-attribute` `instance-attribute` ¶

FLASH_ATTN `class-attribute` `instance-attribute` ¶

FLASH_ATTN_MLA `class-attribute` `instance-attribute` ¶

FLEX_ATTENTION `class-attribute` `instance-attribute` ¶

IPEX `class-attribute` `instance-attribute` ¶

NO_ATTENTION `class-attribute` `instance-attribute` ¶

PALLAS `class-attribute` `instance-attribute` ¶

ROCM_AITER_FA `class-attribute` `instance-attribute` ¶

ROCM_AITER_MLA `class-attribute` `instance-attribute` ¶

ROCM_AITER_UNIFIED_ATTN `class-attribute` `instance-attribute` ¶

ROCM_ATTN `class-attribute` `instance-attribute` ¶

TORCH_SDPA `class-attribute` `instance-attribute` ¶

TREE_ATTN `class-attribute` `instance-attribute` ¶

TRITON_ATTN `class-attribute` `instance-attribute` ¶

TRITON_MLA `class-attribute` `instance-attribute` ¶

XFORMERS `class-attribute` `instance-attribute` ¶

getitem ¶

getattribute ¶

getitem ¶