Skip to content

Wrappers

Reference information for the language model Wrappers API.

eva.language.models.wrappers.HuggingFaceModel

Bases: LanguageModel

Wrapper class for loading HuggingFace transformers models.

Parameters:

Name Type Description Default
model_name_or_path str

The model name or path to load the model from. This can be a local path or a model name from the HuggingFace model hub.

required
model_class str

The class of the model to use (e.g., "AutoModelForCausalLM").

required
model_kwargs Dict[str, Any] | None

Additional arguments for configuring the model.

None
system_prompt str | None

System prompt to use.

None
processor_kwargs Dict[str, Any] | None

Additional processor/tokenizer arguments.

None
generation_kwargs Dict[str, Any] | None

Additional generation parameters (temperature, max_length, etc.).

None
chat_template str | None

Optional chat template name to use with the processor. If None, will use the template stored in the checkpoint's processor config.

None
Source code in src/eva/language/models/wrappers/huggingface.py
def __init__(
    self,
    model_name_or_path: str,
    model_class: str,
    model_kwargs: Dict[str, Any] | None = None,
    system_prompt: str | None = None,
    processor_kwargs: Dict[str, Any] | None = None,
    generation_kwargs: Dict[str, Any] | None = None,
    chat_template: str | None = None,
) -> None:
    """Initializes the model.

    Args:
        model_name_or_path: The model name or path to load the model from.
            This can be a local path or a model name from the `HuggingFace`
            model hub.
        model_class: The class of the model to use (e.g., "AutoModelForCausalLM").
        model_kwargs: Additional arguments for configuring the model.
        system_prompt: System prompt to use.
        processor_kwargs: Additional processor/tokenizer arguments.
        generation_kwargs: Additional generation parameters (temperature, max_length, etc.).
        chat_template: Optional chat template name to use with the processor. If None,
            will use the template stored in the checkpoint's processor config.
    """
    super().__init__(system_prompt=system_prompt)

    self.model_name_or_path = model_name_or_path
    self.model_class = model_class
    self.model_kwargs = model_kwargs or {}
    self.processor_kwargs = processor_kwargs or {}
    self.generation_kwargs = self._default_generation_kwargs | (generation_kwargs or {})
    self.chat_template = chat_template

    self.model: nn.Module
    self.processor: Callable

configure_model

Use configure_model hook to load model in lazy fashion.

Source code in src/eva/language/models/wrappers/huggingface.py
def configure_model(self) -> None:
    """Use configure_model hook to load model in lazy fashion."""
    logger.info(f"Configuring model: {self.model_name_or_path}")
    if not hasattr(self, "model"):
        self.model = self.load_model()
    if not hasattr(self, "processor"):
        self.processor = self.load_processor()

load_model

Loads the model from HuggingFace.

Raises:

Type Description
ValueError

If the model class is not found in transformers or if the model does not support generation.

Source code in src/eva/language/models/wrappers/huggingface.py
@override
def load_model(self) -> nn.Module:
    """Loads the model from HuggingFace.

    Raises:
        ValueError: If the model class is not found in transformers or if the model
            does not support generation.
    """
    import transformers  # Reimport here, in case module was modified at runtime by user

    if hasattr(transformers, self.model_class):
        model_class = getattr(transformers, self.model_class)
    else:
        raise ValueError(f"Model class {self.model_class} not found in transformers")

    model = model_class.from_pretrained(self.model_name_or_path, **self.model_kwargs)

    if not hasattr(model, "generate"):
        raise ValueError(f"Model {self.model_name_or_path} does not support generation.")

    return model

load_processor

Initialize the processor.

Note: For text-only models, AutoProcessor returns the tokenizer.

Source code in src/eva/language/models/wrappers/huggingface.py
def load_processor(self) -> Callable:
    """Initialize the processor.

    Note: For text-only models, AutoProcessor returns the tokenizer.
    """
    processor = transformers.AutoProcessor.from_pretrained(
        self.processor_kwargs.pop("model_name_or_path", self.model_name_or_path),
        **self.processor_kwargs,
    )
    if self.chat_template is not None:
        processor.chat_template = self.chat_template  # type: ignore
    # To ensure correct generation with batched inputs of different lengths
    if "CausalLM" in self.model_class or "ConditionalGeneration" in self.model_class:
        processor.padding_side = "left"
    # Some older models don't have a padding token by default
    if hasattr(processor, "pad_token") and processor.pad_token is None:
        processor.pad_token = processor.eos_token
    return processor

format_inputs

Formats inputs for HuggingFace models.

Note: If multiple system messages are present, they will be combined into a single message, given that many models only support a single system prompt.

Parameters:

Name Type Description Default
batch TextBatch

A batch of text inputs.

required

Returns:

Type Description
Dict[str, Tensor]

A dictionary produced by the tokenizer following a format like:

Dict[str, Tensor]

{ "input_ids": ..., "attention_mask": ...,

Dict[str, Tensor]

}

Source code in src/eva/language/models/wrappers/huggingface.py
@override
def format_inputs(self, batch: TextBatch) -> Dict[str, torch.Tensor]:
    """Formats inputs for HuggingFace models.

    Note: If multiple system messages are present, they will be combined
    into a single message, given that many models only support a single
    system prompt.

    Args:
        batch: A batch of text inputs.

    Returns:
        A dictionary produced by the tokenizer following a format like:
        {
            "input_ids": ...,
            "attention_mask": ...,
        }
    """
    message_batch, _, _ = TextBatch(*batch)
    message_batch = message_utils.batch_insert_system_message(
        message_batch, self.system_message
    )
    message_batch = list(map(message_utils.combine_system_messages, message_batch))

    if self.processor.chat_template is not None:  # type: ignore
        templated_text = [
            self.processor.apply_chat_template(  # type: ignore
                message_utils.format_chat_message(message),
                add_generation_prompt=True,
                tokenize=False,
            )
            for message in message_batch
        ]
    else:
        templated_text = list(map(message_utils.merge_message_contents, message_batch))

    processor_inputs = {
        "text": templated_text,
        "return_tensors": "pt",
        "padding": True,
        **self.processor_kwargs,
    }

    return self.processor(**processor_inputs).to(self.model.device)  # type: ignore

model_forward

Generates text using the model.

Parameters:

Name Type Description Default
batch Dict[str, Tensor]

A dictionary containing the tokenized input data.

required

Returns:

Type Description
ModelOutput

The model output containing generated text.

Source code in src/eva/language/models/wrappers/huggingface.py
@override
def model_forward(self, batch: Dict[str, torch.Tensor]) -> ModelOutput:
    """Generates text using the model.

    Args:
        batch: A dictionary containing the tokenized input data.

    Returns:
        The model output containing generated text.
    """
    output_ids = self.model.generate(**batch, **self.generation_kwargs)  # type: ignore
    decoded_input, decoded_output = self._decode_ids(output_ids, batch["input_ids"].shape[-1])

    return ModelOutput(
        generated_text=decoded_output,
        input_text=decoded_input,
        output_ids=output_ids,
        attention_mask=batch.get("attention_mask"),
    )

eva.language.models.wrappers.LiteLLMModel

Bases: LanguageModel

Wrapper class for LiteLLM language models.

Parameters:

Name Type Description Default
model_name str

The name of the model to use.

required
model_kwargs Dict[str, Any] | None

Additional keyword arguments to pass during generation (e.g., temperature, max_tokens).

None
system_prompt str | None

The system prompt to use (optional).

None
log_level int | None

Optional logging level for LiteLLM. Defaults to WARNING.

INFO
Source code in src/eva/language/models/wrappers/litellm.py
def __init__(
    self,
    model_name: str,
    model_kwargs: Dict[str, Any] | None = None,
    system_prompt: str | None = None,
    log_level: int | None = logging.INFO,
):
    """Initialize the LiteLLM Wrapper.

    Args:
        model_name: The name of the model to use.
        model_kwargs: Additional keyword arguments to pass during
            generation (e.g., `temperature`, `max_tokens`).
        system_prompt: The system prompt to use (optional).
        log_level: Optional logging level for LiteLLM. Defaults to WARNING.
    """
    super().__init__(system_prompt=system_prompt)

    self.model_name = model_name
    self.model_kwargs = self._default_model_kwargs | (model_kwargs or {})

    litellm.suppress_debug_info = True
    litellm.drop_params = True

    if log_level is not None:
        logging.getLogger("LiteLLM").setLevel(log_level)

format_inputs

Formats inputs for LiteLLM.

Parameters:

Name Type Description Default
batch TextBatch

A batch of text inputs.

required

Returns:

Type Description
List[List[Dict[str, Any]]]

A list of messages in the following format:

List[List[Dict[str, Any]]]

[ { "role": ... "content": ... }, ...

List[List[Dict[str, Any]]]

]

Source code in src/eva/language/models/wrappers/litellm.py
@override
def format_inputs(self, batch: TextBatch) -> List[List[Dict[str, Any]]]:
    """Formats inputs for LiteLLM.

    Args:
        batch: A batch of text inputs.

    Returns:
        A list of messages in the following format:
        [
            {
                "role": ...
                "content": ...
            },
            ...
        ]
    """
    message_batch, _, _ = TextBatch(*batch)

    message_batch = message_utils.batch_insert_system_message(
        message_batch, self.system_message
    )
    message_batch = list(map(message_utils.combine_system_messages, message_batch))

    return list(map(message_utils.format_chat_message, message_batch))

model_forward

Generates output text through API calls via LiteLLM's batch completion functionality.

Source code in src/eva/language/models/wrappers/litellm.py
@override
@backoff.on_exception(
    backoff.expo,
    RETRYABLE_ERRORS,
    max_tries=20,
    jitter=backoff.full_jitter,
    on_backoff=lambda details: logger.warning(
        f"Retrying due to {details.get('exception') or 'Unknown error'}"
    ),
)
def model_forward(self, batch: List[List[Dict[str, Any]]]) -> ModelOutput:
    """Generates output text through API calls via LiteLLM's batch completion functionality."""
    outputs = batch_completion(model=self.model_name, messages=batch, **self.model_kwargs)
    self._raise_exceptions(outputs)

    generated_text = [
        output["choices"][0]["message"]["content"]
        for output in outputs
        if output["choices"][0]["message"]["role"] == "assistant"
    ]
    input_text = [
        message_utils.stringify_messages(messages, include_roles=True) for messages in batch
    ]
    return ModelOutput(generated_text=generated_text, input_text=input_text)

eva.language.models.wrappers.VllmModel

Bases: LanguageModel

Wrapper class for using vLLM for text generation.

This wrapper loads a vLLM model, sets up the tokenizer and sampling parameters, and uses a chat template to format inputs for generation.

Parameters:

Name Type Description Default
model_name_or_path str

The model identifier - e.g., a HuggingFace repo ID or local path). Note that the model must be compatible with vLLM.

required
model_kwargs Dict[str, Any] | None

Arguments required to initialize the vLLM model, see link for more information.

None
system_prompt str | None

System prompt to use.

None
generation_kwargs Dict[str, Any] | None

Arguments required to generate the output. See vllm.SamplingParams.

None
chat_template str | None

Optional chat template name to use with the tokenizer. If None, will use the template stored in the checkpoint's tokenizer config.

None
Source code in src/eva/language/models/wrappers/vllm.py
def __init__(
    self,
    model_name_or_path: str,
    model_kwargs: Dict[str, Any] | None = None,
    system_prompt: str | None = None,
    generation_kwargs: Dict[str, Any] | None = None,
    chat_template: str | None = None,
) -> None:
    """Initializes the vLLM model wrapper.

    Args:
        model_name_or_path: The model identifier - e.g., a HuggingFace repo ID or local path).
            Note that the model must be compatible with vLLM.
        model_kwargs: Arguments required to initialize the vLLM model,
            see [link](https://github.com/vllm-project/vllm/blob/main/vllm/entrypoints/llm.py)
            for more information.
        system_prompt: System prompt to use.
        generation_kwargs: Arguments required to generate the output.
            See [vllm.SamplingParams](https://github.com/vllm-project/vllm/blob/main/vllm/sampling_params.py).
        chat_template: Optional chat template name to use with the tokenizer. If None,
            will use the template stored in the checkpoint's tokenizer config.
    """
    super().__init__(system_prompt=system_prompt)
    self.model_name_or_path = model_name_or_path
    self.model_kwargs = self._default_model_kwargs | (model_kwargs or {})
    self.generation_kwargs = self._default_generation_kwargs | (generation_kwargs or {})
    self.chat_template = chat_template

    self.model: LLM
    self.tokenizer: AutoTokenizer

configure_model

Use configure_model hook to load model in lazy fashion.

Source code in src/eva/language/models/wrappers/vllm.py
def configure_model(self) -> None:
    """Use configure_model hook to load model in lazy fashion."""
    if not hasattr(self, "model"):
        self.model = self.load_model()
    if not hasattr(self, "tokenizer"):
        self.tokenizer = self.load_tokenizer()

load_model

Loads the vLLM model.

Source code in src/eva/language/models/wrappers/vllm.py
@override
def load_model(self) -> LLM:
    """Loads the vLLM model."""
    logger.info(f"Loading model with kwargs: {self.model_kwargs}")
    logger.info(f"CUDA_VISIBLE_DEVICES: {os.environ.get('CUDA_VISIBLE_DEVICES')}")
    return LLM(model=self.model_name_or_path, **self.model_kwargs)

load_tokenizer

Loads the tokenizer.

Raises:

Type Description
NotImplementedError

If the tokenizer does not have a chat template.

Source code in src/eva/language/models/wrappers/vllm.py
def load_tokenizer(self) -> AutoTokenizer:
    """Loads the tokenizer.

    Raises:
        NotImplementedError: If the tokenizer does not have a chat template.
    """
    tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path, trust_remote_code=True)
    if self.chat_template is not None:
        tokenizer.chat_template = self.chat_template  # type: ignore
    if not hasattr(tokenizer, "chat_template") or tokenizer.chat_template is None:
        raise NotImplementedError("Currently only chat models are supported.")
    return tokenizer

format_inputs

Formats inputs for vLLM models.

Parameters:

Name Type Description Default
batch TextBatch

A batch of text inputs.

required

Returns:

Type Description
List[Dict[str, Any]]

A list of input dictionaries with "prompt" key.

Source code in src/eva/language/models/wrappers/vllm.py
@override
def format_inputs(self, batch: TextBatch) -> List[Dict[str, Any]]:
    """Formats inputs for vLLM models.

    Args:
        batch: A batch of text inputs.

    Returns:
        A list of input dictionaries with "prompt" key.
    """
    message_batch, _, _ = TextBatch(*batch)
    message_batch = message_utils.batch_insert_system_message(
        message_batch, self.system_message
    )
    message_batch = list(map(message_utils.combine_system_messages, message_batch))

    input_dicts = []
    for messages in message_batch:
        formatted_messages = message_utils.format_chat_message(messages)
        templated_messages = self.tokenizer.apply_chat_template(  # type: ignore
            formatted_messages,
            tokenize=False,
            add_generation_prompt=True,
        )
        input_dicts.append({"prompt": templated_messages})

    return input_dicts

model_forward

Generates text using the vLLM model.

Parameters:

Name Type Description Default
batch List[Dict[str, Any]]

A list of input dictionaries containing "prompt" key (output of format_inputs).

required

Returns:

Type Description
ModelOutput

ModelOutput containing the generated text responses.

Source code in src/eva/language/models/wrappers/vllm.py
@override
def model_forward(self, batch: List[Dict[str, Any]]) -> ModelOutput:
    """Generates text using the vLLM model.

    Args:
        batch: A list of input dictionaries containing "prompt" key
            (output of `format_inputs`).

    Returns:
        ModelOutput containing the generated text responses.
    """
    outputs = self.model.generate(
        batch, sampling_params=SamplingParams(**self.generation_kwargs)
    )
    output_texts = [output.outputs[0].text for output in outputs]

    return ModelOutput(generated_text=output_texts)