Skip to content

Wrappers

Reference information for the language model Wrappers API.

eva.language.models.wrappers.HuggingFaceModel

Bases: LanguageModel

Wrapper class for loading HuggingFace transformers models using pipelines.

Parameters:

Name Type Description Default
model_name_or_path str

The model name or path to load the model from. This can be a local path or a model name from the HuggingFace model hub.

required
task Literal['text-generation']

The pipeline task. Defaults to "text-generation".

'text-generation'
model_kwargs Dict[str, Any] | None

Additional arguments for configuring the pipeline.

None
system_prompt str | None

System prompt to use.

None
generation_kwargs Dict[str, Any] | None

Additional generation parameters (temperature, max_length, etc.).

None
chat_mode bool

Whether the specified model expects chat style messages. If set to False the model is assumed to be a standard text completion model and will expect plain text string inputs.

True
Source code in src/eva/language/models/wrappers/huggingface.py
def __init__(
    self,
    model_name_or_path: str,
    task: Literal["text-generation"] = "text-generation",
    model_kwargs: Dict[str, Any] | None = None,
    system_prompt: str | None = None,
    generation_kwargs: Dict[str, Any] | None = None,
    chat_mode: bool = True,
) -> None:
    """Initializes the model.

    Args:
        model_name_or_path: The model name or path to load the model from.
            This can be a local path or a model name from the `HuggingFace`
            model hub.
        task: The pipeline task. Defaults to "text-generation".
        model_kwargs: Additional arguments for configuring the pipeline.
        system_prompt: System prompt to use.
        generation_kwargs: Additional generation parameters (temperature, max_length, etc.).
        chat_mode: Whether the specified model expects chat style messages. If set to False
            the model is assumed to be a standard text completion model and will expect
            plain text string inputs.
    """
    super().__init__(system_prompt=system_prompt)

    self._model_name_or_path = model_name_or_path
    self._task = task
    self._model_kwargs = model_kwargs or {}
    self._generation_kwargs = self._default_generation_kwargs | (generation_kwargs or {})
    self._chat_mode = chat_mode

    self.model = self.load_model()

load_model

Loads the model as a Hugging Face pipeline.

Source code in src/eva/language/models/wrappers/huggingface.py
@override
def load_model(self) -> Callable:
    """Loads the model as a Hugging Face pipeline."""
    return pipeline(
        task=self._task,
        model=self._model_name_or_path,
        trust_remote_code=True,
        **self._model_kwargs,
    )

format_inputs

Formats inputs for HuggingFace models.

Note: If multiple system messages are present, they will be combined into a single message, given that many models only support a single system prompt.

Parameters:

Name Type Description Default
batch TextBatch

A batch of text and image inputs.

required

Returns:

Type Description
List[List[Dict[str, Any]]] | List[str]

When in chat mode, returns a batch of message series following

List[List[Dict[str, Any]]] | List[str]

OpenAI's API format {"role": "user", "content": "..."}, for non-chat

List[List[Dict[str, Any]]] | List[str]

models returns a list of plain text strings.

Source code in src/eva/language/models/wrappers/huggingface.py
@override
def format_inputs(self, batch: TextBatch) -> List[List[Dict[str, Any]]] | List[str]:
    """Formats inputs for HuggingFace models.

    Note: If multiple system messages are present, they will be combined
    into a single message, given that many models only support a single
    system prompt.

    Args:
        batch: A batch of text and image inputs.

    Returns:
        When in chat mode, returns a batch of message series following
        OpenAI's API format {"role": "user", "content": "..."}, for non-chat
        models returns a list of plain text strings.
    """
    message_batch, _, _ = TextBatch(*batch)
    message_batch = message_utils.batch_insert_system_message(
        message_batch, self.system_message
    )
    message_batch = list(map(message_utils.combine_system_messages, message_batch))

    if self._chat_mode:
        return list(map(message_utils.format_chat_message, message_batch))
    else:
        return list(map(message_utils.merge_message_contents, message_batch))

model_forward

Generates text using the pipeline.

Parameters:

Name Type Description Default
prompts List[str]

The input prompts for the model.

required

Returns:

Type Description
ModelOutput

The generated text as a string.

Source code in src/eva/language/models/wrappers/huggingface.py
@override
def model_forward(self, prompts: List[str]) -> ModelOutput:
    """Generates text using the pipeline.

    Args:
        prompts: The input prompts for the model.

    Returns:
        The generated text as a string.
    """
    outputs = self.model(prompts, return_full_text=False, **self._generation_kwargs)
    if outputs is None:
        raise ValueError("Outputs from the model are None.")

    results = []
    for output in outputs:
        if isinstance(output, list):
            results.append(output[0]["generated_text"])  # type: ignore
        else:
            results.append(output["generated_text"])  # type: ignore

    return ModelOutput(generated_text=results)

eva.language.models.wrappers.LiteLLMModel

Bases: LanguageModel

Wrapper class for LiteLLM language models.

Parameters:

Name Type Description Default
model_name str

The name of the model to use.

required
model_kwargs Dict[str, Any] | None

Additional keyword arguments to pass during generation (e.g., temperature, max_tokens).

None
system_prompt str | None

The system prompt to use (optional).

None
log_level int | None

Optional logging level for LiteLLM. Defaults to WARNING.

INFO
Source code in src/eva/language/models/wrappers/litellm.py
def __init__(
    self,
    model_name: str,
    model_kwargs: Dict[str, Any] | None = None,
    system_prompt: str | None = None,
    log_level: int | None = logging.INFO,
):
    """Initialize the LiteLLM Wrapper.

    Args:
        model_name: The name of the model to use.
        model_kwargs: Additional keyword arguments to pass during
            generation (e.g., `temperature`, `max_tokens`).
        system_prompt: The system prompt to use (optional).
        log_level: Optional logging level for LiteLLM. Defaults to WARNING.
    """
    super().__init__(system_prompt=system_prompt)

    self.model_name = model_name
    self.model_kwargs = self._default_model_kwargs | (model_kwargs or {})

    litellm.suppress_debug_info = True
    litellm.drop_params = True

    if log_level is not None:
        logging.getLogger("LiteLLM").setLevel(log_level)

format_inputs

Formats inputs for LiteLLM.

Parameters:

Name Type Description Default
batch TextBatch

A batch of text inputs.

required

Returns:

Type Description
List[List[Dict[str, Any]]]

A list of messages in the following format:

List[List[Dict[str, Any]]]

[ { "role": ... "content": ... }, ...

List[List[Dict[str, Any]]]

]

Source code in src/eva/language/models/wrappers/litellm.py
@override
def format_inputs(self, batch: TextBatch) -> List[List[Dict[str, Any]]]:
    """Formats inputs for LiteLLM.

    Args:
        batch: A batch of text inputs.

    Returns:
        A list of messages in the following format:
        [
            {
                "role": ...
                "content": ...
            },
            ...
        ]
    """
    message_batch, _, _ = TextBatch(*batch)

    message_batch = message_utils.batch_insert_system_message(
        message_batch, self.system_message
    )
    message_batch = list(map(message_utils.combine_system_messages, message_batch))

    return list(map(message_utils.format_chat_message, message_batch))

model_forward

Generates output text through API calls via LiteLLM's batch completion functionality.

Source code in src/eva/language/models/wrappers/litellm.py
@override
@backoff.on_exception(
    backoff.expo,
    RETRYABLE_ERRORS,
    max_tries=20,
    jitter=backoff.full_jitter,
    on_backoff=lambda details: logger.warning(
        f"Retrying due to {details.get('exception') or 'Unknown error'}"
    ),
)
def model_forward(self, batch: List[List[Dict[str, Any]]]) -> ModelOutput:
    """Generates output text through API calls via LiteLLM's batch completion functionality."""
    outputs = batch_completion(model=self.model_name, messages=batch, **self.model_kwargs)
    self._raise_exceptions(outputs)

    generated_text = [
        output["choices"][0]["message"]["content"]
        for output in outputs
        if output["choices"][0]["message"]["role"] == "assistant"
    ]
    input_text = [
        message_utils.stringify_messages(messages, include_roles=True) for messages in batch
    ]
    return ModelOutput(generated_text=generated_text, input_text=input_text)

eva.language.models.wrappers.VllmModel

Bases: LanguageModel

Wrapper class for using vLLM for text generation.

This wrapper loads a vLLM model, sets up the tokenizer and sampling parameters, and uses a chat template to format inputs for generation.

Parameters:

Name Type Description Default
model_name_or_path str

The model identifier - e.g., a HuggingFace repo ID or local path). Note that the model must be compatible with vLLM.

required
model_kwargs Dict[str, Any] | None

Arguments required to initialize the vLLM model, see link for more information.

None
system_prompt str | None

System prompt to use.

None
generation_kwargs Dict[str, Any] | None

Arguments required to generate the output. See vllm.SamplingParams.

None
Source code in src/eva/language/models/wrappers/vllm.py
def __init__(
    self,
    model_name_or_path: str,
    model_kwargs: Dict[str, Any] | None = None,
    system_prompt: str | None = None,
    generation_kwargs: Dict[str, Any] | None = None,
) -> None:
    """Initializes the vLLM model wrapper.

    Args:
        model_name_or_path: The model identifier - e.g., a HuggingFace repo ID or local path).
            Note that the model must be compatible with vLLM.
        model_kwargs: Arguments required to initialize the vLLM model,
            see [link](https://github.com/vllm-project/vllm/blob/main/vllm/entrypoints/llm.py)
            for more information.
        system_prompt: System prompt to use.
        generation_kwargs: Arguments required to generate the output.
            See [vllm.SamplingParams](https://github.com/vllm-project/vllm/blob/main/vllm/sampling_params.py).
    """
    super().__init__(system_prompt=system_prompt)
    self.model_name_or_path = model_name_or_path
    self.model_kwargs = self._default_model_kwargs | (model_kwargs or {})
    self.generation_kwargs = self._default_generation_kwargs | (generation_kwargs or {})

    self.model: LLM
    self.tokenizer: AutoTokenizer

configure_model

Use configure_model hook to load model in lazy fashion.

Source code in src/eva/language/models/wrappers/vllm.py
def configure_model(self) -> None:
    """Use configure_model hook to load model in lazy fashion."""
    if not hasattr(self, "model"):
        self.model = self.load_model()
    if not hasattr(self, "tokenizer"):
        self.tokenizer = self.load_tokenizer()

load_model

Loads the vLLM model.

Source code in src/eva/language/models/wrappers/vllm.py
@override
def load_model(self) -> LLM:
    """Loads the vLLM model."""
    logger.info(f"Loading model with kwargs: {self.model_kwargs}")
    logger.info(f"CUDA_VISIBLE_DEVICES: {os.environ.get('CUDA_VISIBLE_DEVICES')}")
    return LLM(model=self.model_name_or_path, **self.model_kwargs)

load_tokenizer

Loads the tokenizer.

Raises:

Type Description
NotImplementedError

If the tokenizer does not have a chat template.

Source code in src/eva/language/models/wrappers/vllm.py
def load_tokenizer(self) -> AutoTokenizer:
    """Loads the tokenizer.

    Raises:
        NotImplementedError: If the tokenizer does not have a chat template.
    """
    tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path, trust_remote_code=True)
    if not hasattr(tokenizer, "chat_template") or tokenizer.chat_template is None:
        raise NotImplementedError("Currently only chat models are supported.")
    return tokenizer

format_inputs

Formats inputs for vLLM models.

Parameters:

Name Type Description Default
batch TextBatch

A batch of text inputs.

required

Returns:

Type Description
List[Dict[str, Any]]

A list of input dictionaries with "prompt" key.

Source code in src/eva/language/models/wrappers/vllm.py
@override
def format_inputs(self, batch: TextBatch) -> List[Dict[str, Any]]:
    """Formats inputs for vLLM models.

    Args:
        batch: A batch of text inputs.

    Returns:
        A list of input dictionaries with "prompt" key.
    """
    message_batch, _, _ = TextBatch(*batch)
    message_batch = message_utils.batch_insert_system_message(
        message_batch, self.system_message
    )
    message_batch = list(map(message_utils.combine_system_messages, message_batch))

    input_dicts = []
    for messages in message_batch:
        formatted_messages = message_utils.format_chat_message(messages)
        templated_messages = self.tokenizer.apply_chat_template(  # type: ignore
            formatted_messages,
            tokenize=False,
            add_generation_prompt=True,
        )
        input_dicts.append({"prompt": templated_messages})

    return input_dicts

model_forward

Generates text using the vLLM model.

Parameters:

Name Type Description Default
batch List[Dict[str, Any]]

A list of input dictionaries containing "prompt" key (output of format_inputs).

required

Returns:

Type Description
ModelOutput

ModelOutput containing the generated text responses.

Source code in src/eva/language/models/wrappers/vllm.py
@override
def model_forward(self, batch: List[Dict[str, Any]]) -> ModelOutput:
    """Generates text using the vLLM model.

    Args:
        batch: A list of input dictionaries containing "prompt" key
            (output of `format_inputs`).

    Returns:
        ModelOutput containing the generated text responses.
    """
    outputs = self.model.generate(
        batch, sampling_params=SamplingParams(**self.generation_kwargs)
    )
    output_texts = [output.outputs[0].text for output in outputs]

    return ModelOutput(generated_text=output_texts)