From e4f309ce7295b91ee970b4beee10691afba685a9 Mon Sep 17 00:00:00 2001 From: Bang Xiao Date: Mon, 27 Sep 2021 10:20:54 +0800 Subject: [PATCH] fix can not find /tmp/xxx/yyy.tar.gz when spark use cluster deploy-mode, the run_path will be created on the submitting host instead of the host where the driver is located --- luigi/contrib/spark.py | 1 + 1 file changed, 1 insertion(+) diff --git a/luigi/contrib/spark.py b/luigi/contrib/spark.py index 28950828ea..668b7ba19a 100644 --- a/luigi/contrib/spark.py +++ b/luigi/contrib/spark.py @@ -356,6 +356,7 @@ def _setup_packages(self, sc): mod_path = mod.__path__[0] except AttributeError: mod_path = mod.__file__ + os.makedirs(self.run_path, exist_ok=True) tar_path = os.path.join(self.run_path, package + '.tar.gz') tar = tarfile.open(tar_path, "w:gz") tar.add(mod_path, os.path.basename(mod_path))