Skip to content

"Gemma3" #403

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 9 additions & 21 deletions QEfficient/cloud/compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,29 +85,17 @@
parser.add_argument(
"--enable_qnn",
"--enable-qnn",
nargs="?",
const=True,
type=str,
action="store_true",
default=False,
help="Enables QNN. Optionally, a configuration file can be provided with [--enable_qnn CONFIG_FILE].\
If not provided, the default configuration will be used.\
Sample Config: QEfficient/compile/qnn_config.json",
)

args, compiler_options = parser.parse_known_args()

if isinstance(args.enable_qnn, str):
args.qnn_config = args.enable_qnn
args.enable_qnn = True

compiler_options_dict = {}
for i in range(0, len(compiler_options)):
if compiler_options[i].startswith("--"):
key = compiler_options[i].lstrip("-").replace("-", "_")
value = (
compiler_options[i + 1]
if i + 1 < len(compiler_options) and not compiler_options[i + 1].startswith("-")
else True
)
compiler_options_dict[key] = value
QEfficient.compile(**args.__dict__, **compiler_options_dict)
parser.add_argument(
"qnn_config",
nargs="?",
type=str,
)
# FIXME(ochougul): Allow extra compilation arguments
args = parser.parse_args()
QEfficient.compile(**vars(args))
229 changes: 60 additions & 169 deletions QEfficient/cloud/finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

import random
import warnings
from typing import Any, Dict, Optional, Union

import fire
import numpy as np
Expand All @@ -18,9 +17,8 @@
import torch.utils.data
from peft import PeftModel, get_peft_model
from torch.optim.lr_scheduler import StepLR
from transformers import AutoModel, AutoModelForCausalLM, AutoTokenizer

from QEfficient.finetune.configs.training import TrainConfig
from QEfficient.finetune.configs.training import train_config as TRAIN_CONFIG
from QEfficient.finetune.utils.config_utils import (
generate_dataset_config,
generate_peft_config,
Expand All @@ -34,81 +32,52 @@
from QEfficient.finetune.utils.train_utils import get_longest_seq_length, print_model_size, train
from QEfficient.utils._utils import login_and_download_hf_lm

# Try importing QAIC-specific module, proceed without it if unavailable
try:
import torch_qaic # noqa: F401
except ImportError as e:
print(f"Warning: {e}. Proceeding without QAIC modules.")
print(f"Warning: {e}. Moving ahead without these qaic modules.")


from transformers import AutoModelForSequenceClassification
from transformers import AutoModelForCausalLM, AutoModelForSequenceClassification, AutoTokenizer

# Suppress all warnings
warnings.filterwarnings("ignore")


def setup_distributed_training(train_config: TrainConfig) -> None:
"""Initialize distributed training environment if enabled.

Args:
train_config (TrainConfig): Training configuration object.

Notes:
- If distributed data parallel (DDP) is disabled, this function does nothing.
- Ensures the device is not CPU and does not specify an index for DDP compatibility.
- Initializes the process group using the specified distributed backend.

Raises:
AssertionError: If device is CPU or includes an index with DDP enabled.
def main(**kwargs):
"""
if not train_config.enable_ddp:
return
Helper function to finetune the model on QAic.

torch_device = torch.device(train_config.device)
assert torch_device.type != "cpu", "Host doesn't support single-node DDP"
assert torch_device.index is None, f"DDP requires only device type, got: {torch_device}"
.. code-block:: bash

dist.init_process_group(backend=train_config.dist_backend)
# from here onward "qaic/cuda" will automatically map to "qaic:i/cuda:i", where i = process rank
getattr(torch, torch_device.type).set_device(dist.get_rank())
python -m QEfficient.cloud.finetune OPTIONS

"""
# update the configuration for the training process
train_config = TRAIN_CONFIG()
update_config(train_config, **kwargs)
dataset_config = generate_dataset_config(train_config, kwargs)
device = train_config.device

def setup_seeds(seed: int) -> None:
"""Set random seeds across libraries for reproducibility.
# dist init
if train_config.enable_ddp:
# TODO: may have to init qccl backend, next try run with torchrun command
torch_device = torch.device(device)
assert torch_device.type != "cpu", "Host doesn't support single-node DDP"
assert torch_device.index is None, (
f"DDP requires specification of device type only, however provided device index as well: {torch_device}"
)
dist.init_process_group(backend=train_config.dist_backend)
# from here onward "qaic/cuda" will automatically map to "qaic:i/cuda:i", where i = process rank
getattr(torch, torch_device.type).set_device(dist.get_rank())

Args:
seed (int): Seed value to set for random number generators.
# Set the seeds for reproducibility
torch.manual_seed(train_config.seed)
random.seed(train_config.seed)
np.random.seed(train_config.seed)

Notes:
- Sets seeds for PyTorch, Python's random module, and NumPy.
"""
torch.manual_seed(seed)
random.seed(seed)
np.random.seed(seed)


def load_model_and_tokenizer(
train_config: TrainConfig, dataset_config: Any, peft_config_file: str, **kwargs
) -> tuple[AutoModelForCausalLM, AutoTokenizer]:
"""Load the pre-trained model and tokenizer from Hugging Face.

Args:
config (TrainConfig): Training configuration object containing model and tokenizer names.
dataset_config (Any): A dataclass object representing dataset configuration.
peft_config_file (str): Path to PEFT config file used for PEFT finetuning.
kwargs: Additional arguments to override PEFT config.

Returns:
tuple: A tuple of two values.
- Model with pretrained weights loaded.
- Model's tokenizer (AutoTokenizer).

Notes:
- Downloads the model if not already cached using login_and_download_hf_lm.
- Configures the model with FP16 precision and disables caching for training.
- Resizes model embeddings if tokenizer vocab size exceeds model embedding size.
- Sets pad_token_id to eos_token_id if not defined in the tokenizer.
"""
# Load the pre-trained model and setup its configuration
# config = AutoConfig.from_pretrained(train_config.model_name)
pretrained_model_path = login_and_download_hf_lm(train_config.model_name)
if train_config.task_type == "seq_classification":
model = AutoModelForSequenceClassification.from_pretrained(
Expand All @@ -135,6 +104,7 @@ def load_model_and_tokenizer(
torch_dtype=torch.float16,
)

# Load the tokenizer and add special tokens
tokenizer = AutoTokenizer.from_pretrained(
train_config.model_name if train_config.tokenizer_name is None else train_config.tokenizer_name
)
Expand All @@ -144,12 +114,14 @@ def load_model_and_tokenizer(
# If there is a mismatch between tokenizer vocab size and embedding matrix,
# throw a warning and then expand the embedding matrix
if len(tokenizer) > model.get_input_embeddings().weight.shape[0]:
print("WARNING: Resizing embedding matrix to match tokenizer vocab size.")
print("WARNING: Resizing the embedding matrix to match the tokenizer vocab size.")
model.resize_token_embeddings(len(tokenizer))

# FIXME (Meet): Cover below line inside the logger once it is implemented.
print_model_size(model, train_config)

# print the datatype of the model parameters
# print(get_parameter_dtypes(model))

# Note: Need to call this before calling PeftModel.from_pretrained or get_peft_model.
# Because, both makes model.is_gradient_checkpointing = True which is used in peft library to
# apply gradient checkpointing related hooks to the input embeddings. Without this we will get
Expand All @@ -162,70 +134,17 @@ def load_model_and_tokenizer(
else:
raise RuntimeError("Given model doesn't support gradient checkpointing. Please disable it and run it.")

model = apply_peft(model, train_config, peft_config_file, **kwargs)

return model, tokenizer


def apply_peft(
model: AutoModel, train_config: TrainConfig, peft_config_file: Dict, **kwargs
) -> Union[AutoModel, PeftModel]:
"""Apply Parameter-Efficient Fine-Tuning (PEFT) to the model if enabled.

Args:
model (AutoModel): Huggingface model.
train_config (TrainConfig): Training configuration object.
peft_config_file (str, optional): Path to YAML/JSON file containing
PEFT (LoRA) config. Defaults to None.
kwargs: Additional arguments to override PEFT config params.
if train_config.use_peft:
# Load the pre-trained peft model checkpoint and setup its configuration
if train_config.from_peft_checkpoint:
model = PeftModel.from_pretrained(model, train_config.from_peft_checkpoint, is_trainable=True)
peft_config = model.peft_config
# Generate the peft config and start fine-tuning from original model
else:
peft_config = generate_peft_config(train_config, kwargs)
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

Returns:
Union[AutoModel, PeftModel]: If the use_peft in train_config is True
then PeftModel object is returned else original model object
(AutoModel) is returned.
"""
if not train_config.use_peft:
return model

# Load the pre-trained peft model checkpoint and setup its configuration
if train_config.from_peft_checkpoint:
model = PeftModel.from_pretrained(model, train_config.from_peft_checkpoint, is_trainable=True)
peft_config = model.peft_config
# Generate the peft config and start fine-tuning from original model
else:
peft_config = generate_peft_config(train_config, peft_config_file, **kwargs)
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

return model


def setup_dataloaders(
train_config: TrainConfig,
dataset_config: Any,
tokenizer: AutoTokenizer,
) -> tuple[torch.utils.data.DataLoader, Optional[torch.utils.data.DataLoader], int]:
"""Set up training and validation DataLoaders.

Args:
train_config (TrainConfig): Training configuration object.
dataset_config (Any): Configuration for the dataset (generated from train_config).
tokenizer (AutoTokenizer): Tokenizer for preprocessing data.

Returns:
tuple: A tuple of three values.
- First value represents train_dataloader
- Second value represents eval_dataloader. It is None if
validation is disabled.
- Length of longest sequence in the dataset.

Raises:
ValueError: If validation is enabled but the validation set is too small.

Notes:
- Applies a custom data collator if provided by get_custom_data_collator.
- Configures DataLoader kwargs using get_dataloader_kwargs for train and val splits.
"""
# Get the dataset utils
dataset_processer = tokenizer

Expand All @@ -245,8 +164,6 @@ def setup_dataloaders(
##
train_dl_kwargs = get_dataloader_kwargs(train_config, dataset_train, dataset_processer, "train")
print("length of dataset_train", len(dataset_train))

# FIXME (Meet): Add custom data collator registration from the outside by the user.
custom_data_collator = get_custom_data_collator(dataset_processer, dataset_config)
if custom_data_collator:
print("custom_data_collator is used")
Expand Down Expand Up @@ -291,66 +208,40 @@ def setup_dataloaders(
else:
longest_seq_length, _ = get_longest_seq_length(train_dataloader.dataset)

return train_dataloader, eval_dataloader, longest_seq_length


def main(peft_config_file: str = None, **kwargs) -> None:
"""
Fine-tune a model on QAIC hardware with configurable training and LoRA parameters.

Args:
peft_config_file (str, optional): Path to YAML/JSON file containing PEFT (LoRA) config. Defaults to None.
kwargs: Additional arguments to override TrainConfig.

Example:
.. code-block:: bash

# Using a YAML config file for PEFT
python -m QEfficient.cloud.finetune \\
--model_name "meta-llama/Llama-3.2-1B" \\
--lr 5e-4 \\
--peft_config_file "lora_config.yaml"

# Using default LoRA config
python -m QEfficient.cloud.finetune \\
--model_name "meta-llama/Llama-3.2-1B" \\
--lr 5e-4
"""
train_config = TrainConfig()
update_config(train_config, **kwargs)
dataset_config = generate_dataset_config(train_config.dataset)
update_config(dataset_config, **kwargs)

setup_distributed_training(train_config)
setup_seeds(train_config.seed)
model, tokenizer = load_model_and_tokenizer(train_config, dataset_config, peft_config_file, **kwargs)

# Create DataLoaders for the training and validation dataset
train_dataloader, eval_dataloader, longest_seq_length = setup_dataloaders(train_config, dataset_config, tokenizer)
print(
f"The longest sequence length in the train data is {longest_seq_length}, "
f"passed context length is {train_config.context_length} and overall model's context length is "
f"{model.config.max_position_embeddings}"
)

model.to(train_config.device)
optimizer = optim.AdamW(model.parameters(), lr=train_config.lr, weight_decay=train_config.weight_decay)
optimizer = optim.AdamW(
model.parameters(),
lr=train_config.lr,
weight_decay=train_config.weight_decay,
)
scheduler = StepLR(optimizer, step_size=1, gamma=train_config.gamma)

# wrap model with DDP
if train_config.enable_ddp:
model = nn.parallel.DistributedDataParallel(model, device_ids=[dist.get_rank()])
results = train(

_ = train(
model,
tokenizer,
train_dataloader,
eval_dataloader,
tokenizer,
optimizer,
scheduler,
train_config.gradient_accumulation_steps,
train_config,
train_config.device,
dist.get_rank() if train_config.enable_ddp else None,
None,
)

# finalize torch distributed
if train_config.enable_ddp:
dist.destroy_process_group()
return results


if __name__ == "__main__":
Expand Down
4 changes: 0 additions & 4 deletions QEfficient/cloud/infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,10 +197,6 @@ def main(
**kwargs,
)

# If the io-encrypt flag is passed we will exit after QPC generation.
if kwargs.get("io_encrypt", None):
exit()

#########
# Execute
#########
Expand Down
Loading