Skip to content

Commit fb6b807

Browse files
feat: Support GCS filesystem for bytewax engine (feast-dev#3774)
* fix: Support param timeout when persisting Signed-off-by: Hai Nguyen <quanghai.ng1512@gmail.com> * fix: fix java integration test Signed-off-by: Hai Nguyen <quanghai.ng1512@gmail.com> --------- Signed-off-by: Hai Nguyen <quanghai.ng1512@gmail.com>
1 parent f05a6e7 commit fb6b807

File tree

2 files changed

+3
-4
lines changed

2 files changed

+3
-4
lines changed

sdk/python/feast/infra/materialization/contrib/bytewax/bytewax_materialization_dataflow.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
import pyarrow as pa
44
import pyarrow.parquet as pq
5-
import s3fs
65
from bytewax.dataflow import Dataflow # type: ignore
76
from bytewax.execution import cluster_main
87
from bytewax.inputs import ManualInputConfig, distribute
@@ -29,8 +28,7 @@ def __init__(
2928
self._run_dataflow()
3029

3130
def process_path(self, path):
32-
fs = s3fs.S3FileSystem()
33-
dataset = pq.ParquetDataset(path, filesystem=fs, use_legacy_dataset=False)
31+
dataset = pq.ParquetDataset(path, use_legacy_dataset=False)
3432
batches = []
3533
for fragment in dataset.fragments:
3634
for batch in fragment.to_table().to_batches():

setup.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@
9191
"google-cloud-datastore>=2.1.0,<3",
9292
"google-cloud-storage>=1.34.0,<3",
9393
"google-cloud-bigtable>=2.11.0,<3",
94+
"gcsfs",
9495
]
9596

9697
REDIS_REQUIRED = [
@@ -158,7 +159,7 @@
158159
"moto",
159160
"mypy>=0.981,<0.990",
160161
"avro==1.10.0",
161-
"gcsfs>=0.4.0,<=2022.01.0",
162+
"gcsfs",
162163
"urllib3>=1.25.4,<2",
163164
"psutil==5.9.0",
164165
"py>=1.11.0", # https://github.com/pytest-dev/pytest/issues/10420

0 commit comments

Comments
 (0)