|
18 | 18 | from vllm.model_executor.layers.linear import (ColumnParallelLinear,
|
19 | 19 | RowParallelLinear)
|
20 | 20 | from vllm.model_executor.layers.quantization import QuantizationConfig
|
| 21 | +from vllm.model_executor.models.module_mapping import MultiModelKeys |
21 | 22 | from vllm.model_executor.sampling_metadata import SamplingMetadata
|
22 | 23 | from vllm.multimodal import MULTIMODAL_REGISTRY
|
23 | 24 | from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig,
|
|
31 | 32 | from vllm.multimodal.profiling import BaseDummyInputsBuilder
|
32 | 33 | from vllm.sequence import IntermediateTensors
|
33 | 34 |
|
34 |
| -from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP |
| 35 | +from .interfaces import (MultiModalEmbeddings, SupportsLoRA, |
| 36 | + SupportsMultiModal, SupportsPP) |
35 | 37 | from .pixtral import PixtralHFEncoderInfo, PixtralHFVisionModel
|
36 | 38 | from .utils import (AutoWeightsLoader, flatten_bn, init_vllm_registered_model,
|
37 | 39 | maybe_prefix, merge_multimodal_embeddings)
|
@@ -382,8 +384,8 @@ def init_vision_tower_for_llava(
|
382 | 384 | _build_mistral3_processor,
|
383 | 385 | info=_build_mistral3_info,
|
384 | 386 | dummy_inputs=Mistral3DummyInputsBuilder)
|
385 |
| -class Mistral3ForConditionalGeneration(nn.Module, SupportsMultiModal, |
386 |
| - SupportsPP): |
| 387 | +class Mistral3ForConditionalGeneration(nn.Module, SupportsLoRA, |
| 388 | + SupportsMultiModal, SupportsPP): |
387 | 389 |
|
388 | 390 | packed_modules_mapping = {
|
389 | 391 | "qkv_proj": ["q_proj", "k_proj", "v_proj"],
|
@@ -594,3 +596,12 @@ def load_weights(self, weights: Iterable[Tuple[str,
|
594 | 596 | torch.Tensor]]) -> Set[str]:
|
595 | 597 | loader = AutoWeightsLoader(self)
|
596 | 598 | return loader.load_weights(weights)
|
| 599 | + |
| 600 | + def get_mm_mapping(self) -> MultiModelKeys: |
| 601 | + """ |
| 602 | + Get the module prefix in multimodal models |
| 603 | + """ |
| 604 | + return MultiModelKeys.from_string_field( |
| 605 | + language_model="language_model", |
| 606 | + connector="multi_modal_projector", |
| 607 | + tower_model="vision_tower") |
0 commit comments