Skip to content

vllm.transformers_utils.processor

_P module-attribute

_P = TypeVar(
    "_P", bound=ProcessorMixin, default=ProcessorMixin
)

_V module-attribute

_V = TypeVar(
    "_V",
    bound=BaseVideoProcessor,
    default=BaseVideoProcessor,
)

cached_get_feature_extractor module-attribute

cached_get_feature_extractor = lru_cache(
    get_feature_extractor
)

cached_get_image_processor module-attribute

cached_get_image_processor = lru_cache(get_image_processor)

cached_get_processor module-attribute

cached_get_processor = lru_cache(get_processor)

cached_get_video_processor module-attribute

cached_get_video_processor = lru_cache(get_video_processor)

HashableDict

Bases: dict

A dictionary that can be hashed by lru_cache.

Source code in vllm/transformers_utils/processor.py
class HashableDict(dict):
    """
    A dictionary that can be hashed by lru_cache.
    """

    # NOTE: pythonic dict is not hashable,
    # we override on it directly for simplicity
    def __hash__(self) -> int:  # type: ignore[override]
        return hash(frozenset(self.items()))

__hash__

__hash__() -> int
Source code in vllm/transformers_utils/processor.py
def __hash__(self) -> int:  # type: ignore[override]
    return hash(frozenset(self.items()))

HashableList

Bases: list

A list that can be hashed by lru_cache.

Source code in vllm/transformers_utils/processor.py
class HashableList(list):
    """
    A list that can be hashed by lru_cache.
    """

    def __hash__(self) -> int:  # type: ignore[override]
        return hash(tuple(self))

__hash__

__hash__() -> int
Source code in vllm/transformers_utils/processor.py
def __hash__(self) -> int:  # type: ignore[override]
    return hash(tuple(self))

_collect_dynamic_keys_from_processing_kwargs cached

_collect_dynamic_keys_from_processing_kwargs(
    kwargs_cls: type,
) -> set[str]
Source code in vllm/transformers_utils/processor.py
@lru_cache
def _collect_dynamic_keys_from_processing_kwargs(kwargs_cls: type) -> set[str]:
    dynamic_kwargs: set[str] = set()
    if kwargs_cls is None:
        return dynamic_kwargs
    # get kwargs annotations in processor
    # merge text_kwargs / images_kwargs / videos_kwargs / audio_kwargs
    kwargs_type_annotations = get_type_hints(kwargs_cls)
    for kw_type in ("text_kwargs", "images_kwargs", "videos_kwargs", "audio_kwargs"):
        if kw_type in kwargs_type_annotations:
            kw_annotations = get_type_hints(kwargs_type_annotations[kw_type])
            for kw_name in kw_annotations:
                dynamic_kwargs.add(kw_name)
    dynamic_kwargs |= {"text_kwargs", "images_kwargs", "videos_kwargs", "audio_kwargs"}
    return dynamic_kwargs

_get_processor_factory_fn

_get_processor_factory_fn(
    processor_cls: type | tuple[type, ...],
)
Source code in vllm/transformers_utils/processor.py
def _get_processor_factory_fn(processor_cls: type | tuple[type, ...]):
    if isinstance(processor_cls, tuple) or processor_cls == ProcessorMixin:
        return AutoProcessor.from_pretrained
    if hasattr(processor_cls, "from_pretrained"):
        return processor_cls.from_pretrained

    return processor_cls

_merge_mm_kwargs

_merge_mm_kwargs(
    model_config: ModelConfig,
    processor_cls: type | tuple[type, ...],
    /,
    **kwargs,
)
Source code in vllm/transformers_utils/processor.py
def _merge_mm_kwargs(
    model_config: "ModelConfig",
    processor_cls: type | tuple[type, ...],
    /,
    **kwargs,
):
    mm_config = model_config.get_multimodal_config()
    merged_kwargs = mm_config.merge_mm_processor_kwargs(kwargs)

    factory = _get_processor_factory_fn(processor_cls)
    allowed_kwargs = get_allowed_kwarg_only_overrides(
        factory,
        merged_kwargs,
        requires_kw_only=False,
        allow_var_kwargs=True,
    )
    # NOTE: Pythonic dict is not hashable and will raise unhashable type
    # error when calling `cached_get_processor`, therefore we need to
    # wrap it to a hashable dict.
    for key, value in allowed_kwargs.items():
        if isinstance(value, dict):
            allowed_kwargs[key] = HashableDict(value)
        if isinstance(value, list):
            allowed_kwargs[key] = HashableList(value)

    return allowed_kwargs

cached_feature_extractor_from_config

cached_feature_extractor_from_config(
    model_config: ModelConfig, **kwargs: Any
)
Source code in vllm/transformers_utils/processor.py
def cached_feature_extractor_from_config(
    model_config: "ModelConfig",
    **kwargs: Any,
):
    return cached_get_feature_extractor(
        model_config.model,
        revision=model_config.revision,
        trust_remote_code=model_config.trust_remote_code,
        **_merge_mm_kwargs(model_config, AutoFeatureExtractor, **kwargs),
    )

cached_get_processor_without_dynamic_kwargs

cached_get_processor_without_dynamic_kwargs(
    processor_name: str,
    *args: Any,
    revision: str | None = None,
    trust_remote_code: bool = False,
    processor_cls: type[_P]
    | tuple[type[_P], ...] = ProcessorMixin,
    **kwargs: Any,
) -> _P
Source code in vllm/transformers_utils/processor.py
def cached_get_processor_without_dynamic_kwargs(
    processor_name: str,
    *args: Any,
    revision: str | None = None,
    trust_remote_code: bool = False,
    processor_cls: type[_P] | tuple[type[_P], ...] = ProcessorMixin,
    **kwargs: Any,
) -> _P:
    # Step 1: use default kwargs to get a temporary processor instance
    processor = cached_get_processor(
        processor_name,
        revision=revision,
        trust_remote_code=trust_remote_code,
        processor_cls=processor_cls,  # type: ignore[arg-type]
    )

    # Step 2: use temporary processor collect dynamic keys
    dynamic_keys = get_processor_kwargs_from_processor(processor)

    # Step 3: use dynamic_keys filter kwargs
    filtered_kwargs = {k: v for k, v in kwargs.items() if k not in dynamic_keys}

    # Step 4: use filtered kwargs to get final processor instance
    final_processor = cached_get_processor(
        processor_name,
        revision=revision,
        trust_remote_code=trust_remote_code,
        processor_cls=processor_cls,  # type: ignore[arg-type]
        **filtered_kwargs,
    )

    return final_processor

cached_image_processor_from_config

cached_image_processor_from_config(
    model_config: ModelConfig, **kwargs: Any
)
Source code in vllm/transformers_utils/processor.py
def cached_image_processor_from_config(
    model_config: "ModelConfig",
    **kwargs: Any,
):
    return cached_get_image_processor(
        model_config.model,
        revision=model_config.revision,
        trust_remote_code=model_config.trust_remote_code,
        **_merge_mm_kwargs(model_config, AutoImageProcessor, **kwargs),
    )

cached_processor_from_config

cached_processor_from_config(
    model_config: ModelConfig,
    processor_cls: type[_P]
    | tuple[type[_P], ...] = ProcessorMixin,
    **kwargs: Any,
) -> _P
Source code in vllm/transformers_utils/processor.py
def cached_processor_from_config(
    model_config: "ModelConfig",
    processor_cls: type[_P] | tuple[type[_P], ...] = ProcessorMixin,
    **kwargs: Any,
) -> _P:
    return cached_get_processor_without_dynamic_kwargs(
        model_config.model,
        revision=model_config.revision,
        trust_remote_code=model_config.trust_remote_code,
        processor_cls=processor_cls,  # type: ignore[arg-type]
        **_merge_mm_kwargs(model_config, processor_cls, **kwargs),
    )

cached_video_processor_from_config

cached_video_processor_from_config(
    model_config: ModelConfig,
    processor_cls: type[_V] | None = None,
    **kwargs: Any,
)
Source code in vllm/transformers_utils/processor.py
def cached_video_processor_from_config(
    model_config: "ModelConfig",
    processor_cls: type[_V] | None = None,
    **kwargs: Any,
):
    return cached_get_video_processor(
        model_config.model,
        revision=model_config.revision,
        trust_remote_code=model_config.trust_remote_code,
        processor_cls_overrides=processor_cls,  # type: ignore[arg-type]
        **_merge_mm_kwargs(model_config, AutoVideoProcessor, **kwargs),
    )

get_feature_extractor

get_feature_extractor(
    processor_name: str,
    *args: Any,
    revision: str | None = None,
    trust_remote_code: bool = False,
    **kwargs: Any,
)

Load an audio feature extractor for the given model name via HuggingFace.

Source code in vllm/transformers_utils/processor.py
def get_feature_extractor(
    processor_name: str,
    *args: Any,
    revision: str | None = None,
    trust_remote_code: bool = False,
    **kwargs: Any,
):
    """Load an audio feature extractor for the given model name
    via HuggingFace."""
    try:
        processor_name = convert_model_repo_to_path(processor_name)
        feature_extractor = AutoFeatureExtractor.from_pretrained(
            processor_name,
            *args,
            revision=revision,
            trust_remote_code=trust_remote_code,
            **kwargs,
        )
    except ValueError as e:
        # If the error pertains to the processor class not existing or not
        # currently being imported, suggest using the --trust-remote-code flag.
        # Unlike AutoTokenizer, AutoImageProcessor does not separate such errors
        if not trust_remote_code:
            err_msg = (
                "Failed to load the feature extractor. If the feature "
                "extractor is a custom extractor not yet available in the "
                "HuggingFace transformers library, consider setting "
                "`trust_remote_code=True` in LLM or using the "
                "`--trust-remote-code` flag in the CLI."
            )
            raise RuntimeError(err_msg) from e
        else:
            raise e
    return cast(FeatureExtractionMixin, feature_extractor)

get_image_processor

get_image_processor(
    processor_name: str,
    *args: Any,
    revision: str | None = None,
    trust_remote_code: bool = False,
    **kwargs: Any,
)

Load an image processor for the given model name via HuggingFace.

Source code in vllm/transformers_utils/processor.py
def get_image_processor(
    processor_name: str,
    *args: Any,
    revision: str | None = None,
    trust_remote_code: bool = False,
    **kwargs: Any,
):
    """Load an image processor for the given model name via HuggingFace."""
    try:
        processor_name = convert_model_repo_to_path(processor_name)
        processor = AutoImageProcessor.from_pretrained(
            processor_name,
            *args,
            revision=revision,
            trust_remote_code=trust_remote_code,
            **kwargs,
        )
    except ValueError as e:
        # If the error pertains to the processor class not existing or not
        # currently being imported, suggest using the --trust-remote-code flag.
        # Unlike AutoTokenizer, AutoImageProcessor does not separate such errors
        if not trust_remote_code:
            err_msg = (
                "Failed to load the image processor. If the image processor is "
                "a custom processor not yet available in the HuggingFace "
                "transformers library, consider setting "
                "`trust_remote_code=True` in LLM or using the "
                "`--trust-remote-code` flag in the CLI."
            )
            raise RuntimeError(err_msg) from e
        else:
            raise e

    return cast(BaseImageProcessor, processor)

get_processor

get_processor(
    processor_name: str,
    *args: Any,
    revision: str | None = None,
    trust_remote_code: bool = False,
    processor_cls: type[_P]
    | tuple[type[_P], ...] = ProcessorMixin,
    **kwargs: Any,
) -> _P

Load a processor for the given model name via HuggingFace.

Source code in vllm/transformers_utils/processor.py
def get_processor(
    processor_name: str,
    *args: Any,
    revision: str | None = None,
    trust_remote_code: bool = False,
    processor_cls: type[_P] | tuple[type[_P], ...] = ProcessorMixin,
    **kwargs: Any,
) -> _P:
    """Load a processor for the given model name via HuggingFace."""
    if revision is None:
        revision = "main"
    try:
        processor_name = convert_model_repo_to_path(processor_name)
        if isinstance(processor_cls, tuple) or processor_cls == ProcessorMixin:
            processor = AutoProcessor.from_pretrained(
                processor_name,
                *args,
                revision=revision,
                trust_remote_code=trust_remote_code,
                **kwargs,
            )
        elif issubclass(processor_cls, ProcessorMixin):
            processor = processor_cls.from_pretrained(
                processor_name,
                *args,
                revision=revision,
                trust_remote_code=trust_remote_code,
                **kwargs,
            )
        else:
            # Processors that are standalone classes unrelated to HF
            processor = processor_cls(*args, **kwargs)
    except ValueError as e:
        # If the error pertains to the processor class not existing or not
        # currently being imported, suggest using the --trust-remote-code flag.
        # Unlike AutoTokenizer, AutoProcessor does not separate such errors
        if not trust_remote_code:
            err_msg = (
                "Failed to load the processor. If the processor is "
                "a custom processor not yet available in the HuggingFace "
                "transformers library, consider setting "
                "`trust_remote_code=True` in LLM or using the "
                "`--trust-remote-code` flag in the CLI."
            )
            raise RuntimeError(err_msg) from e
        else:
            raise e

    if not isinstance(processor, processor_cls):
        raise TypeError(
            "Invalid type of HuggingFace processor. "
            f"Expected type: {processor_cls}, but "
            f"found type: {type(processor)}"
        )

    return processor

get_processor_kwargs_from_processor cached

get_processor_kwargs_from_processor(
    processor: _P,
) -> set[str]
Source code in vllm/transformers_utils/processor.py
@lru_cache
def get_processor_kwargs_from_processor(processor: _P) -> set[str]:
    try:
        # get kwargs annotations in processor
        call_kwargs = inspect.signature(type(processor).__call__).parameters.get(
            "kwargs"
        )
        call_kwargs_annotations = call_kwargs.annotation if call_kwargs else None
        # if the processor has explicit kwargs annotation, use it
        if call_kwargs_annotations not in (None, inspect._empty):
            # get_type_hints will parse all type annotations at runtime,
            # and if an annotation refers to a type or
            # name that hasn’t been imported or defined, it will raise an error.
            # So we use __annotations__ to get the raw annotations directly.
            return _collect_dynamic_keys_from_processing_kwargs(
                get_args(call_kwargs_annotations)[0]
            )
        # otherwise, try to get from ProcessingKwargs
        else:
            module_name = type(processor).__module__
            mod = importlib.import_module(module_name)
            # find *ProcessingKwargs in the module
            processor_kwargs: set[str] = set()
            for name, obj in vars(mod).items():
                if name.endswith("ProcessingKwargs"):
                    processor_kwargs = (
                        processor_kwargs
                        | _collect_dynamic_keys_from_processing_kwargs(obj)
                    )
            return processor_kwargs
    except Exception:
        return set()

get_video_processor

get_video_processor(
    processor_name: str,
    *args: Any,
    revision: str | None = None,
    trust_remote_code: bool = False,
    processor_cls_overrides: type[_V] | None = None,
    **kwargs: Any,
)

Load a video processor for the given model name via HuggingFace.

Source code in vllm/transformers_utils/processor.py
def get_video_processor(
    processor_name: str,
    *args: Any,
    revision: str | None = None,
    trust_remote_code: bool = False,
    processor_cls_overrides: type[_V] | None = None,
    **kwargs: Any,
):
    """Load a video processor for the given model name via HuggingFace."""
    try:
        processor_name = convert_model_repo_to_path(processor_name)
        processor_cls = processor_cls_overrides or AutoVideoProcessor
        processor = processor_cls.from_pretrained(
            processor_name,
            *args,
            revision=revision,
            trust_remote_code=trust_remote_code,
            **kwargs,
        )
    except ValueError as e:
        # If the error pertains to the processor class not existing or not
        # currently being imported, suggest using the --trust-remote-code flag.
        # Unlike AutoTokenizer, AutoVideoProcessor does not separate such errors
        if not trust_remote_code:
            err_msg = (
                "Failed to load the video processor. If the video processor is "
                "a custom processor not yet available in the HuggingFace "
                "transformers library, consider setting "
                "`trust_remote_code=True` in LLM or using the "
                "`--trust-remote-code` flag in the CLI."
            )
            raise RuntimeError(err_msg) from e
        else:
            raise e

    return cast(BaseVideoProcessor, processor)