diff --git a/QEfficient/cloud/compile.py b/QEfficient/cloud/compile.py index 8b6da5b0b..5f0b9140c 100644 --- a/QEfficient/cloud/compile.py +++ b/QEfficient/cloud/compile.py @@ -85,17 +85,29 @@ parser.add_argument( "--enable_qnn", "--enable-qnn", - action="store_true", + nargs="?", + const=True, + type=str, default=False, help="Enables QNN. Optionally, a configuration file can be provided with [--enable_qnn CONFIG_FILE].\ If not provided, the default configuration will be used.\ Sample Config: QEfficient/compile/qnn_config.json", ) - parser.add_argument( - "qnn_config", - nargs="?", - type=str, - ) - # FIXME(ochougul): Allow extra compilation arguments - args = parser.parse_args() - QEfficient.compile(**vars(args)) + + args, compiler_options = parser.parse_known_args() + + if isinstance(args.enable_qnn, str): + args.qnn_config = args.enable_qnn + args.enable_qnn = True + + compiler_options_dict = {} + for i in range(0, len(compiler_options)): + if compiler_options[i].startswith("--"): + key = compiler_options[i].lstrip("-").replace("-", "_") + value = ( + compiler_options[i + 1] + if i + 1 < len(compiler_options) and not compiler_options[i + 1].startswith("-") + else True + ) + compiler_options_dict[key] = value + QEfficient.compile(**args.__dict__, **compiler_options_dict) diff --git a/QEfficient/cloud/infer.py b/QEfficient/cloud/infer.py index 68be72fa8..30e67344a 100644 --- a/QEfficient/cloud/infer.py +++ b/QEfficient/cloud/infer.py @@ -197,6 +197,10 @@ def main( **kwargs, ) + # If the io-encrypt flag is passed we will exit after QPC generation. + if kwargs.get("io_encrypt", None): + exit() + ######### # Execute ######### diff --git a/QEfficient/compile/compile_helper.py b/QEfficient/compile/compile_helper.py index 5ce22bed9..70a912cd7 100644 --- a/QEfficient/compile/compile_helper.py +++ b/QEfficient/compile/compile_helper.py @@ -64,9 +64,6 @@ def compile_kv_model_on_cloud_ai_100( DeprecationWarning, stacklevel=2, ) - if kwargs: - # FIXME - raise NotImplementedError("Can't handle extra compilation args now!") aic_binary_dir = os.path.join(base_path, "qpcs") if os.path.isdir(aic_binary_dir): @@ -111,6 +108,13 @@ def compile_kv_model_on_cloud_ai_100( with open(mdp_ts_config_path, "w") as file: json.dump(mdp_ts_config, file, indent=4) command.append(f"-mdp-load-partition-config={mdp_ts_config_path}") + for key, value in kwargs.items(): + option = "-" + key.replace("_", "-") + if isinstance(value, bool): + if value: + command.append(option) + continue + command.append(f"{option}={value}") print("Running AI 100 compiler:", " ".join(command)) result = subprocess.run(command, capture_output=True, text=True) if result.returncode != 0: @@ -221,6 +225,13 @@ def compile( allow_mxint8_mdp_io=allow_mxint8_mdp_io, mos=mos, device_group=device_group, + **kwargs, ) - logger.info(f"Compiled QPC files can be found here: {qpc_path}") + if kwargs.get("io_encrypt", None): + logger.warning( + f"Compilation for IO-Encrypt has been successfully completed at path: {qpc_path}. However, Efficient-Transformers do not support IO-Encrypt execution. Please run the execution separately" + ) + else: + logger.info(f"Compiled QPC files can be found here: {qpc_path}") + return qpc_path diff --git a/QEfficient/transformers/models/modeling_auto.py b/QEfficient/transformers/models/modeling_auto.py index 6b5deb8db..f181ee5eb 100644 --- a/QEfficient/transformers/models/modeling_auto.py +++ b/QEfficient/transformers/models/modeling_auto.py @@ -1635,6 +1635,11 @@ def compile( **compiler_options, ) + if compiler_options.get("io_encrypt", None): + logger.warning( + "Compilation for IO-Encrypt has been successfully completed. However, Efficient-Transformers do not support IO-Encrypt execution. Please run the execution separately with QPC compiled without io-encrypt." + ) + return qpc_path # FIXME: Update this method to match with transformers AutoModelForCausalLM.generate