diff --git a/QEfficient/cloud/compile.py b/QEfficient/cloud/compile.py
index 5f0b9140c..8b6da5b0b 100644
--- a/QEfficient/cloud/compile.py
+++ b/QEfficient/cloud/compile.py
@@ -85,29 +85,17 @@
     parser.add_argument(
         "--enable_qnn",
         "--enable-qnn",
-        nargs="?",
-        const=True,
-        type=str,
+        action="store_true",
         default=False,
         help="Enables QNN. Optionally, a configuration file can be provided with [--enable_qnn CONFIG_FILE].\
              If not provided, the default configuration will be used.\
              Sample Config: QEfficient/compile/qnn_config.json",
     )
-
-    args, compiler_options = parser.parse_known_args()
-
-    if isinstance(args.enable_qnn, str):
-        args.qnn_config = args.enable_qnn
-        args.enable_qnn = True
-
-    compiler_options_dict = {}
-    for i in range(0, len(compiler_options)):
-        if compiler_options[i].startswith("--"):
-            key = compiler_options[i].lstrip("-").replace("-", "_")
-            value = (
-                compiler_options[i + 1]
-                if i + 1 < len(compiler_options) and not compiler_options[i + 1].startswith("-")
-                else True
-            )
-            compiler_options_dict[key] = value
-    QEfficient.compile(**args.__dict__, **compiler_options_dict)
+    parser.add_argument(
+        "qnn_config",
+        nargs="?",
+        type=str,
+    )
+    # FIXME(ochougul): Allow extra compilation arguments
+    args = parser.parse_args()
+    QEfficient.compile(**vars(args))
diff --git a/QEfficient/cloud/finetune.py b/QEfficient/cloud/finetune.py
index c440e73c0..f312d00cb 100644
--- a/QEfficient/cloud/finetune.py
+++ b/QEfficient/cloud/finetune.py
@@ -7,7 +7,6 @@
 
 import random
 import warnings
-from typing import Any, Dict, Optional, Union
 
 import fire
 import numpy as np
@@ -18,9 +17,8 @@
 import torch.utils.data
 from peft import PeftModel, get_peft_model
 from torch.optim.lr_scheduler import StepLR
-from transformers import AutoModel, AutoModelForCausalLM, AutoTokenizer
 
-from QEfficient.finetune.configs.training import TrainConfig
+from QEfficient.finetune.configs.training import train_config as TRAIN_CONFIG
 from QEfficient.finetune.utils.config_utils import (
     generate_dataset_config,
     generate_peft_config,
@@ -34,81 +32,52 @@
 from QEfficient.finetune.utils.train_utils import get_longest_seq_length, print_model_size, train
 from QEfficient.utils._utils import login_and_download_hf_lm
 
-# Try importing QAIC-specific module, proceed without it if unavailable
 try:
     import torch_qaic  # noqa: F401
 except ImportError as e:
-    print(f"Warning: {e}. Proceeding without QAIC modules.")
+    print(f"Warning: {e}. Moving ahead without these qaic modules.")
 
 
-from transformers import AutoModelForSequenceClassification
+from transformers import AutoModelForCausalLM, AutoModelForSequenceClassification, AutoTokenizer
 
 # Suppress all warnings
 warnings.filterwarnings("ignore")
 
 
-def setup_distributed_training(train_config: TrainConfig) -> None:
-    """Initialize distributed training environment if enabled.
-
-    Args:
-        train_config (TrainConfig): Training configuration object.
-
-    Notes:
-        - If distributed data parallel (DDP) is disabled, this function does nothing.
-        - Ensures the device is not CPU and does not specify an index for DDP compatibility.
-        - Initializes the process group using the specified distributed backend.
-
-    Raises:
-        AssertionError: If device is CPU or includes an index with DDP enabled.
+def main(**kwargs):
     """
-    if not train_config.enable_ddp:
-        return
+    Helper function to finetune the model on QAic.
 
-    torch_device = torch.device(train_config.device)
-    assert torch_device.type != "cpu", "Host doesn't support single-node DDP"
-    assert torch_device.index is None, f"DDP requires only device type, got: {torch_device}"
+    .. code-block:: bash
 
-    dist.init_process_group(backend=train_config.dist_backend)
-    # from here onward "qaic/cuda" will automatically map to "qaic:i/cuda:i", where i = process rank
-    getattr(torch, torch_device.type).set_device(dist.get_rank())
+        python -m QEfficient.cloud.finetune OPTIONS
 
+    """
+    # update the configuration for the training process
+    train_config = TRAIN_CONFIG()
+    update_config(train_config, **kwargs)
+    dataset_config = generate_dataset_config(train_config, kwargs)
+    device = train_config.device
 
-def setup_seeds(seed: int) -> None:
-    """Set random seeds across libraries for reproducibility.
+    # dist init
+    if train_config.enable_ddp:
+        # TODO: may have to init qccl backend, next try run with torchrun command
+        torch_device = torch.device(device)
+        assert torch_device.type != "cpu", "Host doesn't support single-node DDP"
+        assert torch_device.index is None, (
+            f"DDP requires specification of device type only, however provided device index as well: {torch_device}"
+        )
+        dist.init_process_group(backend=train_config.dist_backend)
+        # from here onward "qaic/cuda" will automatically map to "qaic:i/cuda:i", where i = process rank
+        getattr(torch, torch_device.type).set_device(dist.get_rank())
 
-    Args:
-        seed (int): Seed value to set for random number generators.
+    # Set the seeds for reproducibility
+    torch.manual_seed(train_config.seed)
+    random.seed(train_config.seed)
+    np.random.seed(train_config.seed)
 
-    Notes:
-        - Sets seeds for PyTorch, Python's random module, and NumPy.
-    """
-    torch.manual_seed(seed)
-    random.seed(seed)
-    np.random.seed(seed)
-
-
-def load_model_and_tokenizer(
-    train_config: TrainConfig, dataset_config: Any, peft_config_file: str, **kwargs
-) -> tuple[AutoModelForCausalLM, AutoTokenizer]:
-    """Load the pre-trained model and tokenizer from Hugging Face.
-
-    Args:
-        config (TrainConfig): Training configuration object containing model and tokenizer names.
-        dataset_config (Any): A dataclass object representing dataset configuration.
-        peft_config_file (str): Path to PEFT config file used for PEFT finetuning.
-        kwargs: Additional arguments to override PEFT config.
-
-    Returns:
-        tuple: A tuple of two values.
-            - Model with pretrained weights loaded.
-            - Model's tokenizer (AutoTokenizer).
-
-    Notes:
-        - Downloads the model if not already cached using login_and_download_hf_lm.
-        - Configures the model with FP16 precision and disables caching for training.
-        - Resizes model embeddings if tokenizer vocab size exceeds model embedding size.
-        - Sets pad_token_id to eos_token_id if not defined in the tokenizer.
-    """
+    # Load the pre-trained model and setup its configuration
+    # config = AutoConfig.from_pretrained(train_config.model_name)
     pretrained_model_path = login_and_download_hf_lm(train_config.model_name)
     if train_config.task_type == "seq_classification":
         model = AutoModelForSequenceClassification.from_pretrained(
@@ -135,6 +104,7 @@ def load_model_and_tokenizer(
             torch_dtype=torch.float16,
         )
 
+    # Load the tokenizer and add special tokens
     tokenizer = AutoTokenizer.from_pretrained(
         train_config.model_name if train_config.tokenizer_name is None else train_config.tokenizer_name
     )
@@ -144,12 +114,14 @@ def load_model_and_tokenizer(
     # If there is a mismatch between tokenizer vocab size and embedding matrix,
     # throw a warning and then expand the embedding matrix
     if len(tokenizer) > model.get_input_embeddings().weight.shape[0]:
-        print("WARNING: Resizing embedding matrix to match tokenizer vocab size.")
+        print("WARNING: Resizing the embedding matrix to match the tokenizer vocab size.")
         model.resize_token_embeddings(len(tokenizer))
 
-    # FIXME (Meet): Cover below line inside the logger once it is implemented.
     print_model_size(model, train_config)
 
+    # print the datatype of the model parameters
+    # print(get_parameter_dtypes(model))
+
     # Note: Need to call this before calling PeftModel.from_pretrained or get_peft_model.
     # Because, both makes model.is_gradient_checkpointing = True which is used in peft library to
     # apply gradient checkpointing related hooks to the input embeddings. Without this we will get
@@ -162,70 +134,17 @@ def load_model_and_tokenizer(
         else:
             raise RuntimeError("Given model doesn't support gradient checkpointing. Please disable it and run it.")
 
-    model = apply_peft(model, train_config, peft_config_file, **kwargs)
-
-    return model, tokenizer
-
-
-def apply_peft(
-    model: AutoModel, train_config: TrainConfig, peft_config_file: Dict, **kwargs
-) -> Union[AutoModel, PeftModel]:
-    """Apply Parameter-Efficient Fine-Tuning (PEFT) to the model if enabled.
-
-    Args:
-        model (AutoModel): Huggingface model.
-        train_config (TrainConfig): Training configuration object.
-        peft_config_file (str, optional): Path to YAML/JSON file containing
-            PEFT (LoRA) config. Defaults to None.
-        kwargs: Additional arguments to override PEFT config params.
+    if train_config.use_peft:
+        # Load the pre-trained peft model checkpoint and setup its configuration
+        if train_config.from_peft_checkpoint:
+            model = PeftModel.from_pretrained(model, train_config.from_peft_checkpoint, is_trainable=True)
+            peft_config = model.peft_config
+        # Generate the peft config and start fine-tuning from original model
+        else:
+            peft_config = generate_peft_config(train_config, kwargs)
+            model = get_peft_model(model, peft_config)
+        model.print_trainable_parameters()
 
-    Returns:
-        Union[AutoModel, PeftModel]: If the use_peft in train_config is True
-            then PeftModel object is returned else original model object
-            (AutoModel) is returned.
-    """
-    if not train_config.use_peft:
-        return model
-
-    # Load the pre-trained peft model checkpoint and setup its configuration
-    if train_config.from_peft_checkpoint:
-        model = PeftModel.from_pretrained(model, train_config.from_peft_checkpoint, is_trainable=True)
-        peft_config = model.peft_config
-    # Generate the peft config and start fine-tuning from original model
-    else:
-        peft_config = generate_peft_config(train_config, peft_config_file, **kwargs)
-        model = get_peft_model(model, peft_config)
-    model.print_trainable_parameters()
-
-    return model
-
-
-def setup_dataloaders(
-    train_config: TrainConfig,
-    dataset_config: Any,
-    tokenizer: AutoTokenizer,
-) -> tuple[torch.utils.data.DataLoader, Optional[torch.utils.data.DataLoader], int]:
-    """Set up training and validation DataLoaders.
-
-    Args:
-        train_config (TrainConfig): Training configuration object.
-        dataset_config (Any): Configuration for the dataset (generated from train_config).
-        tokenizer (AutoTokenizer): Tokenizer for preprocessing data.
-
-    Returns:
-        tuple: A tuple of three values.
-            - First value represents train_dataloader
-            - Second value represents eval_dataloader. It is None if
-              validation is disabled.
-            - Length of longest sequence in the dataset.
-
-    Raises:
-        ValueError: If validation is enabled but the validation set is too small.
-
-    Notes:
-        - Applies a custom data collator if provided by get_custom_data_collator.
-        - Configures DataLoader kwargs using get_dataloader_kwargs for train and val splits.
-    """
     # Get the dataset utils
     dataset_processer = tokenizer
 
@@ -245,8 +164,6 @@ def setup_dataloaders(
     ##
     train_dl_kwargs = get_dataloader_kwargs(train_config, dataset_train, dataset_processer, "train")
     print("length of dataset_train", len(dataset_train))
-
-    # FIXME (Meet): Add custom data collator registration from the outside by the user.
     custom_data_collator = get_custom_data_collator(dataset_processer, dataset_config)
     if custom_data_collator:
         print("custom_data_collator is used")
@@ -291,66 +208,40 @@ def setup_dataloaders(
     else:
         longest_seq_length, _ = get_longest_seq_length(train_dataloader.dataset)
 
-    return train_dataloader, eval_dataloader, longest_seq_length
-
-
-def main(peft_config_file: str = None, **kwargs) -> None:
-    """
-    Fine-tune a model on QAIC hardware with configurable training and LoRA parameters.
-
-    Args:
-        peft_config_file (str, optional): Path to YAML/JSON file containing PEFT (LoRA) config. Defaults to None.
-        kwargs: Additional arguments to override TrainConfig.
-
-    Example:
-        .. code-block:: bash
-
-            # Using a YAML config file for PEFT
-            python -m QEfficient.cloud.finetune \\
-                --model_name "meta-llama/Llama-3.2-1B" \\
-                --lr 5e-4 \\
-                --peft_config_file "lora_config.yaml"
-
-            # Using default LoRA config
-            python -m QEfficient.cloud.finetune \\
-                --model_name "meta-llama/Llama-3.2-1B" \\
-                --lr 5e-4
-    """
-    train_config = TrainConfig()
-    update_config(train_config, **kwargs)
-    dataset_config = generate_dataset_config(train_config.dataset)
-    update_config(dataset_config, **kwargs)
-
-    setup_distributed_training(train_config)
-    setup_seeds(train_config.seed)
-    model, tokenizer = load_model_and_tokenizer(train_config, dataset_config, peft_config_file, **kwargs)
-
-    # Create DataLoaders for the training and validation dataset
-    train_dataloader, eval_dataloader, longest_seq_length = setup_dataloaders(train_config, dataset_config, tokenizer)
     print(
         f"The longest sequence length in the train data is {longest_seq_length}, "
         f"passed context length is {train_config.context_length} and overall model's context length is "
         f"{model.config.max_position_embeddings}"
     )
-
     model.to(train_config.device)
-    optimizer = optim.AdamW(model.parameters(), lr=train_config.lr, weight_decay=train_config.weight_decay)
+    optimizer = optim.AdamW(
+        model.parameters(),
+        lr=train_config.lr,
+        weight_decay=train_config.weight_decay,
+    )
     scheduler = StepLR(optimizer, step_size=1, gamma=train_config.gamma)
+
+    # wrap model with DDP
     if train_config.enable_ddp:
         model = nn.parallel.DistributedDataParallel(model, device_ids=[dist.get_rank()])
-    results = train(
+
+    _ = train(
         model,
-        tokenizer,
         train_dataloader,
         eval_dataloader,
+        tokenizer,
         optimizer,
         scheduler,
+        train_config.gradient_accumulation_steps,
         train_config,
+        train_config.device,
         dist.get_rank() if train_config.enable_ddp else None,
+        None,
     )
+
+    # finalize torch distributed
     if train_config.enable_ddp:
         dist.destroy_process_group()
-    return results
 
 
 if __name__ == "__main__":
diff --git a/QEfficient/cloud/infer.py b/QEfficient/cloud/infer.py
index 30e67344a..68be72fa8 100644
--- a/QEfficient/cloud/infer.py
+++ b/QEfficient/cloud/infer.py
@@ -197,10 +197,6 @@ def main(
         **kwargs,
     )
 
-    #  If the io-encrypt flag is passed we will exit after QPC generation.
-    if kwargs.get("io_encrypt", None):
-        exit()
-
     #########
     # Execute
     #########
diff --git a/QEfficient/compile/compile_helper.py b/QEfficient/compile/compile_helper.py
index 70a912cd7..5ce22bed9 100644
--- a/QEfficient/compile/compile_helper.py
+++ b/QEfficient/compile/compile_helper.py
@@ -64,6 +64,9 @@ def compile_kv_model_on_cloud_ai_100(
         DeprecationWarning,
         stacklevel=2,
     )
+    if kwargs:
+        # FIXME
+        raise NotImplementedError("Can't handle extra compilation args now!")
     aic_binary_dir = os.path.join(base_path, "qpcs")
 
     if os.path.isdir(aic_binary_dir):
@@ -108,13 +111,6 @@ def compile_kv_model_on_cloud_ai_100(
         with open(mdp_ts_config_path, "w") as file:
             json.dump(mdp_ts_config, file, indent=4)
         command.append(f"-mdp-load-partition-config={mdp_ts_config_path}")
-    for key, value in kwargs.items():
-        option = "-" + key.replace("_", "-")
-        if isinstance(value, bool):
-            if value:
-                command.append(option)
-            continue
-        command.append(f"{option}={value}")
     print("Running AI 100 compiler:", " ".join(command))
     result = subprocess.run(command, capture_output=True, text=True)
     if result.returncode != 0:
@@ -225,13 +221,6 @@ def compile(
             allow_mxint8_mdp_io=allow_mxint8_mdp_io,
             mos=mos,
             device_group=device_group,
-            **kwargs,
         )
-        if kwargs.get("io_encrypt", None):
-            logger.warning(
-                f"Compilation for IO-Encrypt has been successfully completed at path: {qpc_path}. However, Efficient-Transformers do not support IO-Encrypt execution. Please run the execution separately"
-            )
-        else:
-            logger.info(f"Compiled QPC files can be found here: {qpc_path}")
-
+        logger.info(f"Compiled QPC files can be found here: {qpc_path}")
     return qpc_path
diff --git a/QEfficient/finetune/configs/peft_config.py b/QEfficient/finetune/configs/peft_config.py
index a47774500..e2d018f05 100644
--- a/QEfficient/finetune/configs/peft_config.py
+++ b/QEfficient/finetune/configs/peft_config.py
@@ -9,24 +9,15 @@
 from typing import List
 
 
+# Currently, the support is for Lora Configs only
+# In future, we can expand to llama_adapters and prefix tuning
+# TODO: vbaddi: Check back once FSDP is enabled
 @dataclass
-class LoraConfig:
-    """LoRA-specific configuration for parameter-efficient fine-tuning.
-
-    Attributes:
-        r (int): LoRA rank (default: 8).
-        lora_alpha (int): LoRA scaling factor (default: 32).
-        target_modules (List[str]): Modules to apply LoRA to (default: ["q_proj", "v_proj"]).
-        bias (str): Bias handling in LoRA (default: "none").
-        task_type (str): Task type for LoRA (default: "CAUSAL_LM").
-        lora_dropout (float): Dropout rate for LoRA (default: 0.0).
-        inference_mode (bool): Whether model is in inference mode (default: False).
-    """
-
+class lora_config:
     r: int = 8
     lora_alpha: int = 32
     target_modules: List[str] = field(default_factory=lambda: ["q_proj", "v_proj"])
-    bias: str = "none"
+    bias = "none"
     task_type: str = "CAUSAL_LM"
     lora_dropout: float = 0.05
     inference_mode: bool = False  # should be False for finetuning
@@ -34,6 +25,6 @@ class LoraConfig:
 
 # CAUTION prefix tuning is currently not supported
 @dataclass
-class PrefixConfig:
+class prefix_config:
     num_virtual_tokens: int = 30
     task_type: str = "CAUSAL_LM"
diff --git a/QEfficient/finetune/configs/training.py b/QEfficient/finetune/configs/training.py
index 69b083b6a..c50954c4c 100644
--- a/QEfficient/finetune/configs/training.py
+++ b/QEfficient/finetune/configs/training.py
@@ -7,54 +7,8 @@
 from dataclasses import dataclass
 
 
-# Configuration Classes
 @dataclass
-class TrainConfig:
-    """Training configuration for model fine-tuning.
-
-    Attributes:
-        model_name (str): Name of the pre-trained model to fine-tune (default: "meta-llama/Llama-3.2-1B").
-        tokenizer_name (str): Name of the tokenizer (defaults to model_name if None).
-        run_validation (bool): Whether to run validation during training (default: True).
-        batch_size_training (int): Batch size for training (default: 1).
-        context_length (Optional[int]): Maximum sequence length for inputs (default: None).
-        gradient_accumulation_steps (int): Steps for gradient accumulation (default: 4).
-        gradient checkpointing (bool): Enable gradient checkpointing to save the memory by compromising the speed. (default: False).
-        num_epochs (int): Number of training epochs (default: 1).
-        max_train_step (int): Maximum training steps (default: 0, unlimited if 0).
-        max_eval_step (int): Maximum evaluation steps (default: 0, unlimited if 0).
-        device (str): Device to train on (default: "qaic").
-        num_workers_dataloader (int): Number of workers for data loading (default: 1).
-        lr (float): Learning rate (default: 3e-4).
-        weight_decay (float): Weight decay for optimizer (default: 0.0).
-        gamma (float): Learning rate decay factor (default: 0.85).
-        seed (int): Random seed for reproducibility (default: 42).
-        use_fp16 (bool): Use mixed precision training (default: True).
-        use_autocast (bool): Use autocast for mixed precision (default: True).
-        val_batch_size (int): Batch size for validation (default: 1).
-        dataset (str): Dataset name for training (default: "samsum_dataset").
-        task_type (str): Type of task for which the finetuning is to be done. Options: "generation" and "seq_classification". (default: "generation")
-        peft_method (str): Parameter-efficient fine-tuning method (default: "lora").
-        use_peft (bool): Whether to use PEFT (default: True).
-        from_peft_checkpoint (str): Path to PEFT checkpoint (default: "").
-        output_dir (str): Directory to save outputs (default: "meta-llama-samsum").
-        num_freeze_layers (int): Number of layers to freeze (default: 1).
-        one_qaic (bool): Use single QAIC device (default: False).
-        save_model (bool): Save the trained model (default: True).
-        save_metrics (bool): Save training metrics (default: True).
-        intermediate_step_save (int): Steps between intermediate saves (default: 1000).
-        batching_strategy (str): Batching strategy (default: "packing").
-        enable_sorting_for_ddp (bool): Sort data for DDP (default: True).
-        convergence_counter (int): Steps to check convergence (default: 5).
-        convergence_loss (float): Loss threshold for convergence (default: 1e-4).
-        use_profiler (bool): Enable profiling (default: False).
-        enable_ddp (bool): Enable distributed data parallel (default: False).
-        dist_backend (str): Backend for distributed training (default: "cpu:gloo,qaic:qccl,cuda:gloo").
-        grad_scaler (bool): Use gradient scaler (default: True).
-        dump_root_dir (str): Directory for mismatch dumps (default: "meta-llama-samsum-mismatches/step_").
-        opByOpVerifier (bool): Enable operation-by-operation verification (default: False).
-    """
-
+class train_config:
     model_name: str = "meta-llama/Llama-3.2-1B"
     tokenizer_name: str = None  # if not passed as an argument, it uses the value of model_name
     run_validation: bool = True
diff --git a/QEfficient/finetune/eval.py b/QEfficient/finetune/eval.py
index 3fe6e0d81..918230554 100644
--- a/QEfficient/finetune/eval.py
+++ b/QEfficient/finetune/eval.py
@@ -11,6 +11,7 @@
 import fire
 import numpy as np
 import torch
+from configs.training import train_config as TRAIN_CONFIG
 from peft import AutoPeftModelForCausalLM
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from utils.config_utils import (
@@ -24,8 +25,6 @@
 )
 from utils.train_utils import evaluation, print_model_size
 
-from QEfficient.finetune.configs.training import TrainConfig
-
 try:
     import torch_qaic  # noqa: F401
 
@@ -40,7 +39,7 @@
 
 def main(**kwargs):
     # update the configuration for the training process
-    train_config = TrainConfig()
+    train_config = TRAIN_CONFIG()
     update_config(train_config, **kwargs)
 
     # Set the seeds for reproducibility
diff --git a/QEfficient/finetune/utils/config_utils.py b/QEfficient/finetune/utils/config_utils.py
index c5c7fe615..e979961d6 100644
--- a/QEfficient/finetune/utils/config_utils.py
+++ b/QEfficient/finetune/utils/config_utils.py
@@ -4,39 +4,27 @@
 # SPDX-License-Identifier: BSD-3-Clause
 #
 # -----------------------------------------------------------------------------
+
 import inspect
-import json
-import os
 from dataclasses import asdict
-from typing import Any, Dict
 
 import torch.distributed as dist
 import torch.utils.data as data_utils
-import yaml
 from peft import (
     AdaptionPromptConfig,
+    LoraConfig,
     PrefixTuningConfig,
 )
-from peft import LoraConfig as PeftLoraConfig
 from transformers.data import DataCollatorForSeq2Seq
 
 import QEfficient.finetune.configs.dataset_config as datasets
-from QEfficient.finetune.configs.peft_config import LoraConfig, PrefixConfig
-from QEfficient.finetune.configs.training import TrainConfig
+from QEfficient.finetune.configs.peft_config import lora_config, prefix_config
+from QEfficient.finetune.configs.training import train_config
 from QEfficient.finetune.data.sampler import DistributedLengthBasedBatchSampler
 from QEfficient.finetune.dataset.dataset_config import DATASET_PREPROC
 
 
 def update_config(config, **kwargs):
-    """Update the attributes of a config object based on provided keyword arguments.
-
-    Args:
-        config: The configuration object (e.g., TrainConfig, LoraConfig) or a list/tuple of such objects.
-        **kwargs: Keyword arguments representing attributes to update.
-
-    Raises:
-        ValueError: If an unknown parameter is provided and the config type doesn't support nested updates.
-    """
     if isinstance(config, (tuple, list)):
         for c in config:
             update_config(c, **kwargs)
@@ -45,73 +33,40 @@ def update_config(config, **kwargs):
             if hasattr(config, k):
                 setattr(config, k, v)
             elif "." in k:
-                config_name, param_name = k.split(".", 1)
-                if type(config).__name__.lower() == config_name.lower():
+                # allow --some_config.some_param=True
+                config_name, param_name = k.split(".")
+                if type(config).__name__ == config_name:
                     if hasattr(config, param_name):
                         setattr(config, param_name, v)
                     else:
-                        raise ValueError(f"Config '{config_name}' does not have parameter: '{param_name}'")
-            else:
-                config_type = type(config).__name__
-                # FIXME (Meet): Once logger is available put this in debug level.
-                print(f"[WARNING]: Unknown parameter '{k}' for config type '{config_type}'")
+                        # In case of specialized config we can warn user
+                        assert False, f"Warning: {config_name} does not accept parameter: {k}"
+            elif isinstance(config, train_config):
+                assert False, f"Warning: unknown parameter {k}"
 
 
-def generate_peft_config(train_config: TrainConfig, peft_config_file: str = None, **kwargs) -> Any:
-    """Generate a PEFT-compatible configuration from a custom config based on peft_method.
+def generate_peft_config(train_config, kwargs):
+    configs = (lora_config, prefix_config)
+    peft_configs = (LoraConfig, AdaptionPromptConfig, PrefixTuningConfig)
+    names = tuple(c.__name__.rstrip("_config") for c in configs)
 
-    Args:
-        train_config (TrainConfig): Training configuration with peft_method.
-        custom_config: Custom configuration object (e.g., LoraConfig).
+    if train_config.peft_method not in names:
+        raise RuntimeError(f"Peft config not found: {train_config.peft_method}")
 
-    Returns:
-        Any: A PEFT-specific configuration object (e.g., PeftLoraConfig).
+    config = configs[names.index(train_config.peft_method)]()
 
-    Raises:
-        RuntimeError: If the peft_method is not supported.
-    """
-    if peft_config_file:
-        peft_config_data = load_config_file(peft_config_file)
-        validate_config(peft_config_data, config_type="lora")
-        peft_config = PeftLoraConfig(**peft_config_data)
-    else:
-        config_map = {
-            "lora": (LoraConfig, PeftLoraConfig),
-            "prefix": (PrefixConfig, PrefixTuningConfig),
-            "adaption_prompt": (None, AdaptionPromptConfig),
-        }
-
-        if train_config.peft_method not in config_map:
-            raise RuntimeError(f"Peft config not found: {train_config.peft_method}")
-
-        config_cls, peft_config_cls = config_map[train_config.peft_method]
-        if config_cls is None:
-            params = kwargs
-        else:
-            config = config_cls()
-            update_config(config, **kwargs)
-            params = asdict(config)
+    update_config(config, **kwargs)
+    params = asdict(config)
+    peft_config = peft_configs[names.index(train_config.peft_method)](**params)
 
-        peft_config = peft_config_cls(**params)
     return peft_config
 
 
-def generate_dataset_config(dataset_name: str) -> Any:
-    """Generate a dataset configuration based on the specified dataset.
-
-    Args:
-        dataset_name (str): Name of the dataset to be used for finetuning.
-
-    Returns:
-        Any: A dataset configuration object.
-
-    Raises:
-        AssertionError: If the dataset name is not recognized.
-    """
-    supported_datasets = DATASET_PREPROC.keys()
-    assert dataset_name in supported_datasets, f"Given dataset '{dataset_name}' is not supported."
-    # FIXME (Meet): Replace below logic by creating using auto registry of datasets.
-    dataset_config = {k: v for k, v in inspect.getmembers(datasets)}[dataset_name]()
+def generate_dataset_config(train_config, kwargs):
+    names = tuple(DATASET_PREPROC.keys())
+    assert train_config.dataset in names, f"Unknown dataset: {train_config.dataset}"
+    dataset_config = {k: v for k, v in inspect.getmembers(datasets)}[train_config.dataset]()
+    update_config(dataset_config, **kwargs)
     return dataset_config
 
 
@@ -143,84 +98,3 @@ def get_dataloader_kwargs(train_config, dataset, dataset_processer, mode):
         kwargs["drop_last"] = True
     kwargs["collate_fn"] = DataCollatorForSeq2Seq(dataset_processer)
     return kwargs
-
-
-def validate_config(config_data: Dict[str, Any], config_type: str = "lora") -> None:
-    """Validate the provided YAML/JSON configuration for required fields and types.
-
-    Args:
-        config_data (Dict[str, Any]): The configuration dictionary loaded from YAML/JSON.
-        config_type (str): Type of config to validate ("lora" for LoraConfig, default: "lora").
-
-    Raises:
-        ValueError: If required fields are missing or have incorrect types.
-        FileNotFoundError: If the config file path is invalid (handled upstream).
-
-    Notes:
-        - Validates required fields for LoraConfig: r, lora_alpha, target_modules.
-        - Ensures types match expected values (int, float, list, etc.).
-    """
-    if config_type.lower() != "lora":
-        raise ValueError(f"Unsupported config_type: {config_type}. Only 'lora' is supported.")
-
-    required_fields = {
-        "r": int,
-        "lora_alpha": int,
-        "target_modules": list,
-    }
-    optional_fields = {
-        "bias": str,
-        "task_type": str,
-        "lora_dropout": float,
-        "inference_mode": bool,
-    }
-
-    # Check for missing required fields
-    missing_fields = [field for field in required_fields if field not in config_data]
-    if missing_fields:
-        raise ValueError(f"Missing required fields in {config_type} config: {missing_fields}")
-
-    # Validate types of required fields
-    for field, expected_type in required_fields.items():
-        if not isinstance(config_data[field], expected_type):
-            raise ValueError(
-                f"Field '{field}' in {config_type} config must be of type {expected_type.__name__}, "
-                f"got {type(config_data[field]).__name__}"
-            )
-
-    # Validate target_modules contains strings
-    if not all(isinstance(mod, str) for mod in config_data["target_modules"]):
-        raise ValueError("All elements in 'target_modules' must be strings")
-
-    # Validate types of optional fields if present
-    for field, expected_type in optional_fields.items():
-        if field in config_data and not isinstance(config_data[field], expected_type):
-            raise ValueError(
-                f"Field '{field}' in {config_type} config must be of type {expected_type.__name__}, "
-                f"got {type(config_data[field]).__name__}"
-            )
-
-
-def load_config_file(config_path: str) -> Dict[str, Any]:
-    """Load a configuration from a YAML or JSON file.
-
-    Args:
-        config_path (str): Path to the YAML or JSON file.
-
-    Returns:
-        Dict[str, Any]: The loaded configuration as a dictionary.
-
-    Raises:
-        FileNotFoundError: If the file does not exist.
-        ValueError: If the file format is unsupported.
-    """
-    if not os.path.exists(config_path):
-        raise FileNotFoundError(f"Config file not found: {config_path}")
-
-    with open(config_path, "r") as f:
-        if config_path.endswith(".yaml") or config_path.endswith(".yml"):
-            return yaml.safe_load(f)
-        elif config_path.endswith(".json"):
-            return json.load(f)
-        else:
-            raise ValueError("Unsupported config file format. Use .yaml, .yml, or .json")
diff --git a/QEfficient/finetune/utils/train_utils.py b/QEfficient/finetune/utils/train_utils.py
index 8693ae32d..2bc701008 100644
--- a/QEfficient/finetune/utils/train_utils.py
+++ b/QEfficient/finetune/utils/train_utils.py
@@ -18,7 +18,7 @@
 from torch.utils.tensorboard import SummaryWriter
 from tqdm import tqdm
 
-from QEfficient.finetune.configs.training import TrainConfig
+from QEfficient.finetune.configs.training import train_config as TRAIN_CONFIG
 
 try:
     import torch_qaic  # noqa: F401
@@ -34,31 +34,34 @@
 
 def train(
     model,
-    tokenizer,
     train_dataloader,
     eval_dataloader,
+    tokenizer,
     optimizer,
     lr_scheduler,
-    train_config: TrainConfig,
+    gradient_accumulation_steps,
+    train_config: TRAIN_CONFIG,
+    device,
     local_rank=None,
+    rank=None,
 ):
     """
     Trains the model on the given dataloader
 
     Args:
         model: The model to be trained
-        tokenizer: tokenizer used in the eval for decoding the predicitons
         train_dataloader: The dataloader containing the training data
-        eval_dataloader: The dataloader containing the eval data
         optimizer: The optimizer used for training
         lr_scheduler: The learning rate scheduler
-        train_config: The training configuration
+        gradient_accumulation_steps: The number of steps to accumulate gradients before performing a backward/update operation
+        num_epochs: The number of epochs to train for
         local_rank: The rank of the current node in a distributed setting
+        train_config: The training configuration
+        eval_dataloader: The dataloader containing the eval data
+        tokenizer: tokenizer used in the eval for decoding the predicitons
 
     Returns: results dictionary containing average training and validation perplexity and loss
     """
-    device = train_config.device
-
     train_metric = []
     train_loss = []
     val_metric = []
@@ -458,7 +461,7 @@ def evaluation_helper(model, train_config, eval_dataloader, device):
     # Print evaluation metrics
     print(f" {eval_metric.detach().cpu()=} {eval_epoch_loss.detach().cpu()=}")
 
-    return eval_epoch_loss, eval_metric, val_step_loss, val_step_metric
+    return eval_metric, eval_epoch_loss, val_step_loss, val_step_metric
 
 
 def get_longest_seq_length(data: List[Dict]) -> Tuple[int, int]:
diff --git a/QEfficient/transformers/models/gemma3/modeling_gemma3.py b/QEfficient/transformers/models/gemma3/modeling_gemma3.py
index 70601489d..58b837e9c 100644
--- a/QEfficient/transformers/models/gemma3/modeling_gemma3.py
+++ b/QEfficient/transformers/models/gemma3/modeling_gemma3.py
@@ -560,9 +560,16 @@ def __init__(self, model):
         self.model = model
         self.model.vision_model = self.model.vision_tower
 
-    def forward(self, pixel_values):
+    def forward(self, input_ids, pixel_values):
+        inputs_embeds = self.model.get_input_embeddings()(input_ids)
+        B, N, C = inputs_embeds.shape
         image_features = self.model.get_image_features(pixel_values=pixel_values)
-        return image_features
+        selected = input_ids == self.model.config.image_token_index
+        indices1 = selected.to(torch.int64).cumsum(1) - 1
+        indices0 = torch.arange(selected.unsqueeze(0).shape[0]).view(-1, 1)
+        image_features_expanded = image_features.reshape(-1, C).unsqueeze(0)[indices0, indices1]
+        image_input_embeds = torch.where(selected.unsqueeze(-1), image_features_expanded, inputs_embeds)
+        return image_input_embeds
 
 
 class QEffGemma3DecoderWrapper(nn.Module):
@@ -572,21 +579,14 @@ def __init__(self, model):
         self.language_model = self.model.language_model
         self.config = self.model.config
 
-    def forward(self, input_ids, vision_embeds, position_ids, index, past_key_values):
-        inputs_embeds = self.model.get_input_embeddings()(input_ids)
-        B, N, C = inputs_embeds.shape
-        selected = input_ids == self.model.config.image_token_index
-        indices1 = selected.to(torch.int64).cumsum(1) - 1
-        indices1 = torch.where(indices1 != -1, indices1 + index, indices1)
-        indices0 = torch.arange(selected.unsqueeze(0).shape[0]).view(-1, 1)
-        image_features_expanded = vision_embeds.reshape(-1, C).unsqueeze(0)[indices0, indices1]
-        image_input_embeds = torch.where(selected.unsqueeze(-1), image_features_expanded, inputs_embeds)
-        inputs_embeds = torch.where(input_ids.shape[1] == torch.tensor(1), inputs_embeds, image_input_embeds)
+    def forward(self, input_ids, vision_embeds, position_ids, past_key_values):
+        image_embeds = vision_embeds[:, : input_ids.shape[1], :]
+        inputs_embeds = self.model.language_model.get_input_embeddings()(input_ids)
+        inputs_embeds = torch.where(input_ids.shape[1] == torch.tensor(1), inputs_embeds, image_embeds)
         outputs = self.model.language_model(
             inputs_embeds=inputs_embeds, position_ids=position_ids, past_key_values=past_key_values, use_cache=True
         )
-        index = (indices1.max() + 1).unsqueeze(0).unsqueeze(0)
-        return outputs.logits, vision_embeds, index, outputs.past_key_values
+        return outputs.logits, vision_embeds, outputs.past_key_values
 
 
 class QEffGemma3ForConditionalGeneration(Gemma3ForConditionalGeneration):
@@ -605,6 +605,11 @@ def get_specializations(
         kv_offload: bool = False,
         **compiler_options,
     ):
+        vision_seq_len = compiler_options.pop("vision_seq_len", None)
+        if vision_seq_len is None:
+            # TODO: Check properly for Gemma3, Not verified yet.
+            vision_seq_len = 512  # for Gemma3 Vision feature shape is (1, 4096, 1152) --> 1152 is hidden size)
+
         prefill_seq_len = prefill_seq_len if prefill_seq_len else 32
         ctx_len = ctx_len if ctx_len else constants.INTERN_CTX_LEN
         if img_size is None and hasattr(self.config.vision_config, "image_size"):
@@ -612,13 +617,12 @@ def get_specializations(
         elif img_size is None:
             img_size = 896  # FIXME based on gemma3 Image size
             logger.warning("Setting img_size to be 336, as it was neither passed nor found in vision_config")
-        mm_tokens_per_image = getattr(self.config, "mm_tokens_per_image", 256)
 
         vision = [
             {
                 "batch_size": batch_size,
                 "img_size": img_size,
-                "seq_len": prefill_seq_len,
+                "seq_len": vision_seq_len,
                 "ctx_len": ctx_len,
             }
         ]
@@ -628,14 +632,14 @@ def get_specializations(
                 "seq_len": prefill_seq_len,
                 "ctx_len": ctx_len,
                 "img_size": img_size,
-                "mm_tokens_per_image": mm_tokens_per_image,
+                "chunk_length": prefill_seq_len,
             },
             {
                 "batch_size": batch_size,
                 "seq_len": "1",
                 "ctx_len": ctx_len,
                 "img_size": img_size,
-                "mm_tokens_per_image": mm_tokens_per_image,
+                "chunk_length": prefill_seq_len,
             },
         ]
 
@@ -654,8 +658,9 @@ def get_onnx_dynamic_axes(self, kv_offload: bool = False):
         lang_dynamic_axes = {}
         lang_dynamic_axes["input_ids"] = {0: "batch_size", 1: "seq_len"}
         lang_dynamic_axes["position_ids"] = {0: "batch_size", 1: "seq_len"}
-        lang_dynamic_axes["vision_embeds"] = {0: "batch_size", 1: "mm_tokens_per_image"}
+        lang_dynamic_axes["vision_embeds"] = {0: "batch_size", 1: "chunk_length"}
         vision_dynamic_axes["pixel_values"] = {0: "batch_size", 2: "img_size", 3: "img_size"}
+        vision_dynamic_axes["input_ids"] = {0: "batch_size", 1: "seq_len"}
 
         pkv_dynamic_axes = {0: "batch_size", 2: "ctx_len"}
         for i in range(self.language_model.config.num_hidden_layers):
@@ -680,7 +685,6 @@ def get_output_names(self, kv_offload: bool = False):
         output_names = {}
         if kv_offload:
             lang_output_names.insert(1, "vision_embeds_RetainedState")
-            lang_output_names.insert(2, "index_output")
             output_names["vision"] = vision_output_names
             output_names["lang"] = lang_output_names
         else:
@@ -694,13 +698,12 @@ def get_dummy_inputs(self, kv_offload: bool = False):
         else:
             img_size = 896
 
-        mm_tokens_per_image = getattr(self.config, "mm_tokens_per_image", 256)
         # Define shapes
         inputs_shapes = {}
         inputs_shapes["input_ids"] = (constants.ONNX_EXPORT_EXAMPLE_BATCH_SIZE, constants.ONNX_EXPORT_EXAMPLE_SEQ_LEN)
         inputs_shapes["vision_embeds"] = (
             1,  # constants.INTERN_NUM_PATCHES,
-            mm_tokens_per_image,  # constants.INTERN_FEATURE_SIZE,
+            constants.ONNX_EXPORT_EXAMPLE_SEQ_LEN,  # constants.INTERN_FEATURE_SIZE,
             self.language_model.config.hidden_size,  # 5120
         )
         inputs_shapes["position_ids"] = (
@@ -713,12 +716,12 @@ def get_dummy_inputs(self, kv_offload: bool = False):
             img_size,
             img_size,
         )
-        inputs_shapes["index"] = (1, 1)
 
         # Define inputs
         vision_inputs = {}
         lang_inputs = {}
         vision_inputs["pixel_values"] = torch.zeros((inputs_shapes["pixel_values"]), dtype=torch.float32)
+        vision_inputs["input_ids"] = torch.zeros((inputs_shapes["input_ids"]), dtype=torch.int64)
         lang_inputs["input_ids"] = torch.zeros((inputs_shapes["input_ids"]), dtype=torch.int64)
         lang_inputs["vision_embeds"] = torch.zeros((inputs_shapes["vision_embeds"]), dtype=torch.float32)
         lang_inputs["position_ids"] = (
@@ -726,7 +729,7 @@ def get_dummy_inputs(self, kv_offload: bool = False):
             .view(1, constants.ONNX_EXPORT_EXAMPLE_SEQ_LEN)
             .repeat(constants.ONNX_EXPORT_EXAMPLE_BATCH_SIZE, 1)
         )
-        lang_inputs["index"] = torch.zeros((inputs_shapes["index"]), dtype=torch.int64)
+
         # Add data for KV
         kv_cache_shape = get_padding_shape_from_config(
             config=self.language_model.config,
diff --git a/QEfficient/transformers/models/modeling_auto.py b/QEfficient/transformers/models/modeling_auto.py
index ebfd529cc..1a9610187 100644
--- a/QEfficient/transformers/models/modeling_auto.py
+++ b/QEfficient/transformers/models/modeling_auto.py
@@ -751,8 +751,8 @@ def kv_offload_generate(
         input_len = inputs["attention_mask"].sum(1, keepdims=True)
         input_ids_length = inputs["input_ids"].shape[1]
         num_chunks = -(input_ids_length // -prefill_seq_len)  # ceil divide without float
-        padded_len = num_chunks * prefill_seq_len  # Convert to a multiple of prompt_len
-
+        # padded_len = num_chunks * prefill_seq_len  # Convert to a multiple of prompt_len
+        padded_len = vision_session.bindings[vision_session.binding_index_map["input_ids"]].dims[1]
         if generation_len is None:
             generation_len = ctx_len - input_len.max()
         assert generation_len > 0, "generation length should be greater than zero"
@@ -783,11 +783,13 @@ def kv_offload_generate(
         }
 
         vision_inputs["pixel_values"] = vision_inputs["pixel_values"].astype("float16")
+        vision_inputs["input_ids"] = inputs["input_ids"]
         vision_start = perf_counter()
         vision_outputs = vision_session.run(vision_inputs)
         vision_end = perf_counter()
 
         lang_inputs = {k: v for k, v in inputs.items() if k not in vision_inputs}
+        lang_inputs["input_ids"] = inputs["input_ids"]
         lang_inputs["position_ids"] = np.where(
             lang_inputs.pop("attention_mask"), np.arange(padded_len), -1
         )  # Need to use -1 as position_ids for invalid tokens
@@ -795,27 +797,25 @@ def kv_offload_generate(
         vision_session.deactivate()
         lang_session.activate()
         lang_inputs["vision_embeds"] = vision_outputs["vision_embeds"]
-        lang_session.set_buffers(vision_outputs)
+        # lang_session.set_buffers(vision_outputs)
         prefill_start = perf_counter()
         # Run prefill
-        chunk_inputs = lang_inputs.copy()
-        chunk_inputs["index"] = np.array([[0]])
         for i in range(num_chunks):
+            chunk_inputs = lang_inputs.copy()
             chunk_inputs["input_ids"] = lang_inputs["input_ids"][:, i * prefill_seq_len : (i + 1) * prefill_seq_len]
             chunk_inputs["position_ids"] = lang_inputs["position_ids"][
                 :, i * prefill_seq_len : (i + 1) * prefill_seq_len
             ]
+            chunk_inputs["vision_embeds"] = lang_inputs["vision_embeds"][
+                :, i * prefill_seq_len : (i + 1) * prefill_seq_len
+            ]
             outputs = lang_session.run(chunk_inputs)
-            chunk_inputs["index"] = outputs["index_output"]
 
         prefill_time = perf_counter() - prefill_start + vision_end - vision_start
+        lang_inputs["vision_embeds"] = lang_inputs["vision_embeds"][:, :prefill_seq_len]
         # Skip inputs/outputs again
         lang_session.skip_buffers(
-            [
-                x
-                for x in lang_session.input_names + lang_session.output_names
-                if x.startswith("past_") or x.endswith("_RetainedState")
-            ]
+            [x for x in lang_session.input_names + lang_session.output_names if x.startswith("past_")]
         )
 
         # Get first token
@@ -1643,11 +1643,6 @@ def compile(
             **compiler_options,
         )
 
-        if compiler_options.get("io_encrypt", None):
-            logger.warning(
-                "Compilation for IO-Encrypt has been successfully completed. However, Efficient-Transformers do not support IO-Encrypt execution. Please run the execution separately with QPC compiled without io-encrypt."
-            )
-
         return qpc_path
 
     # FIXME: Update this method to match with transformers AutoModelForCausalLM.generate
diff --git a/QEfficient/utils/_utils.py b/QEfficient/utils/_utils.py
index 564bdd94d..b6af66be5 100644
--- a/QEfficient/utils/_utils.py
+++ b/QEfficient/utils/_utils.py
@@ -521,57 +521,27 @@ def __repr__(self):
 def dump_qconfig(func):
     def wrapper(self, *args, **kwargs):
         result = func(self, *args, **kwargs)
-        try:
-            create_and_dump_qconfigs(
-                self.qpc_path,
-                self.onnx_path,
-                self.get_model_config,
-                [cls.__name__ for cls in self._pytorch_transforms],
-                [cls.__name__ for cls in self._onnx_transforms],
-                kwargs.get("specializations"),
-                kwargs.get("mdp_ts_num_devices", 1),
-                kwargs.get("num_speculative_tokens"),
-                **{
-                    k: v
-                    for k, v in kwargs.items()
-                    if k
-                    not in ["specializations", "mdp_ts_num_devices", "num_speculative_tokens", "custom_io", "onnx_path"]
-                },
-            )
-        except Exception as e:
-            print(f"An unexpected error occurred while dumping the qconfig: {e}")
+        create_and_dump_qconfigs(
+            self.qpc_path,
+            self.onnx_path,
+            self.get_model_config,
+            [cls.__name__ for cls in self._pytorch_transforms],
+            [cls.__name__ for cls in self._onnx_transforms],
+            kwargs.get("specializations"),
+            kwargs.get("mdp_ts_num_devices", 1),
+            kwargs.get("num_speculative_tokens"),
+            **{
+                k: v
+                for k, v in kwargs.items()
+                if k
+                not in ["specializations", "mdp_ts_num_devices", "num_speculative_tokens", "custom_io", "onnx_path"]
+            },
+        )
         return result
 
     return wrapper
 
 
-def get_qaic_sdk_version(qaic_sdk_xml_path: str) -> Optional[str]:
-    """
-    Extracts the QAIC SDK version from the given SDK XML file.
-
-    Args:
-        qaic_sdk_xml_path (str): Path to the SDK XML file.
-    Returns:
-        The SDK version as a string if found, otherwise None.
-    """
-    qaic_sdk_version = None
-
-    # Check and extract version from the given SDK XML file
-    if os.path.exists(qaic_sdk_xml_path):
-        try:
-            tree = ET.parse(qaic_sdk_xml_path)
-            root = tree.getroot()
-            base_version_element = root.find(".//base_version")
-            if base_version_element is not None:
-                qaic_sdk_version = base_version_element.text
-        except ET.ParseError as e:
-            print(f"Error parsing XML file {qaic_sdk_xml_path}: {e}")
-        except Exception as e:
-            print(f"An unexpected error occurred while processing {qaic_sdk_xml_path}: {e}")
-
-    return qaic_sdk_version
-
-
 def create_and_dump_qconfigs(
     qpc_path,
     onnx_path,
@@ -588,12 +558,29 @@ def create_and_dump_qconfigs(
     Such as huggingface configs, QEff transforms, QAIC sdk version, QNN sdk, compilation dir, qpc dir and
     many other compilation options.
     """
-    enable_qnn = compiler_options.get("enable_qnn", False)
-    qnn_config_path = compiler_options.get("qnn_config", None)
+    qnn_config = compiler_options["qnn_config"] if "qnn_config" in compiler_options else None
+    enable_qnn = True if "qnn_config" in compiler_options else None
+
     qconfig_file_path = os.path.join(os.path.dirname(qpc_path), "qconfig.json")
     onnx_path = str(onnx_path)
     specializations_file_path = str(os.path.join(os.path.dirname(qpc_path), "specializations.json"))
     compile_dir = str(os.path.dirname(qpc_path))
+    qnn_config_path = (
+        (qnn_config if qnn_config is not None else "QEfficient/compile/qnn_config.json") if enable_qnn else None
+    )
+
+    # Extract QAIC SDK Apps Version from SDK XML file
+    tree = ET.parse(Constants.SDK_APPS_XML)
+    root = tree.getroot()
+    qaic_version = root.find(".//base_version").text
+
+    # Extract QNN SDK details from YAML file if the environment variable is set
+    qnn_sdk_details = None
+    qnn_sdk_path = os.getenv(QnnConstants.QNN_SDK_PATH_ENV_VAR_NAME)
+    if enable_qnn and qnn_sdk_path:
+        qnn_sdk_yaml_path = os.path.join(qnn_sdk_path, QnnConstants.QNN_SDK_YAML)
+        with open(qnn_sdk_yaml_path, "r") as file:
+            qnn_sdk_details = yaml.safe_load(file)
 
     # Ensure all objects in the configs dictionary are JSON serializable
     def make_serializable(obj):
@@ -615,38 +602,29 @@ def make_serializable(obj):
                 "onnx_transforms": make_serializable(onnx_transforms),
                 "onnx_path": onnx_path,
             },
-            "compiler_config": {
-                "enable_qnn": enable_qnn,
-                "compile_dir": compile_dir,
-                "specializations_file_path": specializations_file_path,
-                "specializations": make_serializable(specializations),
-                "mdp_ts_num_devices": mdp_ts_num_devices,
-                "num_speculative_tokens": num_speculative_tokens,
-                **compiler_options,
-            },
-            "aic_sdk_config": {
-                "qaic_apps_version": get_qaic_sdk_version(Constants.SDK_APPS_XML),
-                "qaic_platform_version": get_qaic_sdk_version(Constants.SDK_PLATFORM_XML),
-            },
         },
     }
 
+    aic_compiler_config = {
+        "apps_sdk_version": qaic_version,
+        "compile_dir": compile_dir,
+        "specializations_file_path": specializations_file_path,
+        "specializations": make_serializable(specializations),
+        "mdp_ts_num_devices": mdp_ts_num_devices,
+        "num_speculative_tokens": num_speculative_tokens,
+        **compiler_options,
+    }
+    qnn_config = {
+        "enable_qnn": enable_qnn,
+        "qnn_config_path": qnn_config_path,
+    }
+    # Put AIC or qnn details.
     if enable_qnn:
-        qnn_sdk_path = os.getenv(QnnConstants.QNN_SDK_PATH_ENV_VAR_NAME)
-        if not qnn_sdk_path:
-            raise EnvironmentError(
-                f"QNN_SDK_PATH {qnn_sdk_path} is not set. Please set {QnnConstants.QNN_SDK_PATH_ENV_VAR_NAME}"
-            )
-        qnn_sdk_yaml_path = os.path.join(qnn_sdk_path, QnnConstants.QNN_SDK_YAML)
-        qnn_sdk_details = load_yaml(
-            qnn_sdk_yaml_path
-        )  # Extract QNN SDK details from YAML file if the environment variable is set
-        qnn_config = {
-            "qnn_config_path": qnn_config_path,
-        }
         qconfigs["qpc_config"]["qnn_config"] = qnn_config
         if qnn_sdk_details:
             qconfigs["qpc_config"]["qnn_config"].update(qnn_sdk_details)
+    else:
+        qconfigs["qpc_config"]["aic_compiler_config"] = aic_compiler_config
 
     create_json(qconfig_file_path, qconfigs)
 
diff --git a/QEfficient/utils/constants.py b/QEfficient/utils/constants.py
index c8f74907a..b1ff9701e 100644
--- a/QEfficient/utils/constants.py
+++ b/QEfficient/utils/constants.py
@@ -97,10 +97,7 @@ class Constants:
     MAX_QPC_LIMIT = 30
     MAX_RETRIES = 10  # This constant will be used set the maximum number of retry attempts for downloading a model using huggingface_hub snapshot_download
     NUM_SPECULATIVE_TOKENS = 2
-    SDK_APPS_XML = "/opt/qti-aic/versions/apps.xml"  # This xml file is parsed to find out the SDK apps version.
-    SDK_PLATFORM_XML = (
-        "/opt/qti-aic/versions/platform.xml"  # This xml file is parsed to find out the SDK platform version.
-    )
+    SDK_APPS_XML = "/opt/qti-aic/versions/apps.xml"  # This xml file is parsed to find out the SDK version.
 
 
 @dataclass
diff --git a/scripts/Jenkinsfile b/scripts/Jenkinsfile
index 7036d6f6d..fcd2fece5 100644
--- a/scripts/Jenkinsfile
+++ b/scripts/Jenkinsfile
@@ -171,4 +171,4 @@ pipeline {
            deleteDir()
        }
    }
-}
\ No newline at end of file
+}
diff --git a/scripts/finetune/run_ft_model.py b/scripts/finetune/run_ft_model.py
index ef014923b..5e88db641 100644
--- a/scripts/finetune/run_ft_model.py
+++ b/scripts/finetune/run_ft_model.py
@@ -12,7 +12,7 @@
 from peft import AutoPeftModelForCausalLM
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
-from QEfficient.finetune.configs.training import TrainConfig
+from QEfficient.finetune.configs.training import train_config as TRAIN_CONFIG
 
 # Suppress all warnings
 warnings.filterwarnings("ignore")
@@ -25,7 +25,7 @@
     print(f"Warning: {e}. Moving ahead without these qaic modules.")
     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 
-train_config = TrainConfig()
+train_config = TRAIN_CONFIG()
 model = AutoModelForCausalLM.from_pretrained(
     train_config.model_name,
     use_cache=False,
diff --git a/tests/finetune/test_finetune.py b/tests/finetune/test_finetune.py
index fb4a84dc0..45330cad6 100644
--- a/tests/finetune/test_finetune.py
+++ b/tests/finetune/test_finetune.py
@@ -8,7 +8,6 @@
 import os
 import shutil
 
-import numpy as np
 import pytest
 import torch.optim as optim
 from torch.utils.data import DataLoader
@@ -23,25 +22,12 @@ def clean_up(path):
         shutil.rmtree(path)
 
 
-configs = [
-    pytest.param(
-        "meta-llama/Llama-3.2-1B",  # model_name
-        10,  # max_eval_step
-        20,  # max_train_step
-        1,  # intermediate_step_save
-        None,  # context_length
-        True,  # run_validation
-        True,  # use_peft
-        "qaic",  # device
-        id="llama_config",  # config name
-    )
-]
+configs = [pytest.param("meta-llama/Llama-3.2-1B", 1, 1, 1, None, True, True, "cpu", id="llama_config")]
 
 
-@pytest.mark.skip(reason="Currently CI is broken. Once it is fixed we will enable this test.")
-@pytest.mark.cli
+# TODO:enable this once docker is available
 @pytest.mark.on_qaic
-@pytest.mark.finetune
+@pytest.mark.skip(reason="eager docker not available in sdk")
 @pytest.mark.parametrize(
     "model_name,max_eval_step,max_train_step,intermediate_step_save,context_length,run_validation,use_peft,device",
     configs,
@@ -57,7 +43,7 @@ def test_finetune(
     device,
     mocker,
 ):
-    train_config_spy = mocker.spy(QEfficient.cloud.finetune, "TrainConfig")
+    train_config_spy = mocker.spy(QEfficient.cloud.finetune, "TRAIN_CONFIG")
     generate_dataset_config_spy = mocker.spy(QEfficient.cloud.finetune, "generate_dataset_config")
     generate_peft_config_spy = mocker.spy(QEfficient.cloud.finetune, "generate_peft_config")
     get_dataloader_kwargs_spy = mocker.spy(QEfficient.cloud.finetune, "get_dataloader_kwargs")
@@ -79,28 +65,23 @@ def test_finetune(
         "device": device,
     }
 
-    results = finetune(**kwargs)
-    assert np.allclose(results["avg_train_loss"], 0.00232327, atol=1e-5), "Train loss is not matching."
-    assert np.allclose(results["avg_train_metric"], 1.002326, atol=1e-5), "Train metric is not matching."
-    assert np.allclose(results["avg_eval_loss"], 0.0206124, atol=1e-5), "Eval loss is not matching."
-    assert np.allclose(results["avg_eval_metric"], 1.020826, atol=1e-5), "Eval metric is not matching."
-    assert results["avg_epoch_time"] < 60, "Training should complete within 60 seconds."
+    finetune(**kwargs)
 
     train_config_spy.assert_called_once()
     generate_dataset_config_spy.assert_called_once()
     generate_peft_config_spy.assert_called_once()
+    update_config_spy.assert_called_once()
     get_custom_data_collator_spy.assert_called_once()
     get_longest_seq_length_spy.assert_called_once()
     print_model_size_spy.assert_called_once()
     train_spy.assert_called_once()
 
-    assert update_config_spy.call_count == 2
     assert get_dataloader_kwargs_spy.call_count == 2
     assert get_preprocessed_dataset_spy.call_count == 2
 
     args, kwargs = train_spy.call_args
-    train_dataloader = args[2]
-    eval_dataloader = args[3]
+    train_dataloader = args[1]
+    eval_dataloader = args[2]
     optimizer = args[4]
 
     batch = next(iter(train_dataloader))
@@ -116,19 +97,12 @@ def test_finetune(
     else:
         assert eval_dataloader is None
 
-    args, kwargs = update_config_spy.call_args_list[0]
+    args, kwargs = update_config_spy.call_args
     train_config = args[0]
-    assert max_train_step >= train_config.gradient_accumulation_steps, (
-        "Total training step should be more than "
-        f"{train_config.gradient_accumulation_steps} which is gradient accumulation steps."
-    )
 
-    saved_file = os.path.join(train_config.output_dir, "complete_epoch_1/adapter_model.safetensors")
+    saved_file = os.path.join(train_config.output_dir, "adapter_model.safetensors")
     assert os.path.isfile(saved_file)
 
     clean_up(train_config.output_dir)
     clean_up("runs")
     clean_up(train_config.dump_root_dir)
-
-
-# TODO (Meet): Add seperate tests for BERT FT and LLama FT
diff --git a/tests/transformers/spd/test_pld_inference.py b/tests/transformers/spd/test_pld_inference.py
index 71b4e01cd..c80fe5969 100644
--- a/tests/transformers/spd/test_pld_inference.py
+++ b/tests/transformers/spd/test_pld_inference.py
@@ -262,7 +262,7 @@ def test_pld_spec_decode_inference(
         num_speculative_tokens=num_speculative_tokens,
     )
     # init qaic session
-    target_model_session = QAICInferenceSession(target_model_qpc_path)
+    target_model_session = QAICInferenceSession(target_model_qpc_path, device_ids=device_group)
     draft_model_session = None
 
     # skip inputs/outputs buffers
@@ -453,7 +453,7 @@ def test_pld_spec_decode_inference(
     del draft_model_session
     generated_ids = np.asarray(generated_ids[0]).flatten()
     gen_len = generated_ids.shape[0]
-    exec_info = target_model.generate(tokenizer, Constants.INPUT_STR)
+    exec_info = target_model.generate(tokenizer, Constants.INPUT_STR, device_group)
     cloud_ai_100_tokens = exec_info.generated_ids[0][
         :gen_len
     ]  # Because we always run for single input and single batch size
diff --git a/tests/transformers/spd/test_spd_inference.py b/tests/transformers/spd/test_spd_inference.py
index e87c51d5f..6f6bdb268 100644
--- a/tests/transformers/spd/test_spd_inference.py
+++ b/tests/transformers/spd/test_spd_inference.py
@@ -157,8 +157,8 @@ def test_spec_decode_inference(
         full_batch_size=full_batch_size,
     )
     # init qaic session
-    target_model_session = QAICInferenceSession(target_model_qpc_path)
-    draft_model_session = QAICInferenceSession(draft_model_qpc_path)
+    target_model_session = QAICInferenceSession(target_model_qpc_path, device_ids=device_group)
+    draft_model_session = QAICInferenceSession(draft_model_qpc_path, device_ids=device_group)
 
     # skip inputs/outputs buffers
     target_model_session.skip_buffers(set([x for x in target_model_session.input_names if x.startswith("past_")]))
@@ -341,7 +341,7 @@ def test_spec_decode_inference(
     del draft_model_session
     generated_ids = np.asarray(generated_ids[0]).flatten()
     gen_len = generated_ids.shape[0]
-    exec_info = draft_model.generate(tokenizer, Constants.INPUT_STR)
+    exec_info = draft_model.generate(tokenizer, Constants.INPUT_STR, device_group)
     cloud_ai_100_tokens = exec_info.generated_ids[0][
         :gen_len
     ]  # Because we always run for single input and single batch size