Skip to content

Commit 23c6c86

Browse files
authored
fix: Retire pytz library (feast-dev#4406)
* fix: Remove pytz. Signed-off-by: Shuchu Han <shuchu.han@gmail.com> * fix: Keep the pytz.UTC part in dask.py Signed-off-by: Shuchu Han <shuchu.han@gmail.com> --------- Signed-off-by: Shuchu Han <shuchu.han@gmail.com>
1 parent cebbe04 commit 23c6c86

File tree

29 files changed

+109
-133
lines changed

29 files changed

+109
-133
lines changed

sdk/python/feast/driver_test_data.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
# This module generates dummy data to be used for tests and examples.
22
import itertools
3+
from datetime import timedelta, timezone
34
from enum import Enum
45

56
import numpy as np
67
import pandas as pd
7-
from pytz import FixedOffset, timezone, utc
8+
from zoneinfo import ZoneInfo
89

910
from feast.infra.offline_stores.offline_utils import (
1011
DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL,
@@ -22,11 +23,15 @@ def _convert_event_timestamp(event_timestamp: pd.Timestamp, t: EventTimestampTyp
2223
if t == EventTimestampType.TZ_NAIVE:
2324
return event_timestamp
2425
elif t == EventTimestampType.TZ_AWARE_UTC:
25-
return event_timestamp.replace(tzinfo=utc)
26+
return event_timestamp.replace(tzinfo=timezone.utc)
2627
elif t == EventTimestampType.TZ_AWARE_FIXED_OFFSET:
27-
return event_timestamp.replace(tzinfo=utc).astimezone(FixedOffset(60))
28+
return event_timestamp.replace(tzinfo=timezone.utc).astimezone(
29+
tz=timezone(timedelta(minutes=60))
30+
)
2831
elif t == EventTimestampType.TZ_AWARE_US_PACIFIC:
29-
return event_timestamp.replace(tzinfo=utc).astimezone(timezone("US/Pacific"))
32+
return event_timestamp.replace(tzinfo=timezone.utc).astimezone(
33+
tz=ZoneInfo("US/Pacific")
34+
)
3035

3136

3237
def create_orders_df(

sdk/python/feast/embedded_go/type_map.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1+
from datetime import timezone
12
from typing import List
23

34
import pyarrow as pa
4-
import pytz
55

66
from feast.protos.feast.types import Value_pb2
77
from feast.types import Array, PrimitiveFeastType
88

9-
PA_TIMESTAMP_TYPE = pa.timestamp("s", tz=pytz.UTC)
9+
PA_TIMESTAMP_TYPE = pa.timestamp("s", tz=timezone.utc)
1010

1111
ARROW_TYPE_TO_PROTO_FIELD = {
1212
pa.int32(): "int32_val",

sdk/python/feast/feature_logging.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import abc
2+
from datetime import timezone
23
from typing import TYPE_CHECKING, Dict, Optional, Type, cast
34

45
import pyarrow as pa
5-
from pytz import UTC
66

77
from feast.data_source import DataSource
88
from feast.embedded_go.type_map import FEAST_TYPE_TO_ARROW_TYPE, PA_TIMESTAMP_TYPE
@@ -97,7 +97,7 @@ def get_schema(self, registry: "BaseRegistry") -> pa.Schema:
9797
)
9898

9999
# system columns
100-
fields[LOG_TIMESTAMP_FIELD] = pa.timestamp("us", tz=UTC)
100+
fields[LOG_TIMESTAMP_FIELD] = pa.timestamp("us", tz=timezone.utc)
101101
fields[LOG_DATE_FIELD] = pa.date32()
102102
fields[REQUEST_ID_FIELD] = pa.string()
103103

sdk/python/feast/infra/materialization/snowflake_engine.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
11
import os
22
import shutil
33
from dataclasses import dataclass
4-
from datetime import datetime
4+
from datetime import datetime, timezone
55
from typing import Callable, List, Literal, Optional, Sequence, Union
66

77
import click
88
import pandas as pd
99
from colorama import Fore, Style
1010
from pydantic import ConfigDict, Field, StrictStr
11-
from pytz import utc
1211
from tqdm import tqdm
1312

1413
import feast
@@ -276,7 +275,10 @@ def _materialize_one(
276275
execute_snowflake_statement(conn, query).fetchall()[0][0]
277276
/ 1_000_000_000
278277
)
279-
if last_commit_change_time < start_date.astimezone(tz=utc).timestamp():
278+
if (
279+
last_commit_change_time
280+
< start_date.astimezone(tz=timezone.utc).timestamp()
281+
):
280282
return SnowflakeMaterializationJob(
281283
job_id=job_id, status=MaterializationJobStatus.SUCCEEDED
282284
)

sdk/python/feast/infra/offline_stores/contrib/athena_offline_store/athena.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import contextlib
22
import uuid
3-
from datetime import datetime
3+
from datetime import datetime, timezone
44
from pathlib import Path
55
from typing import (
66
Callable,
@@ -19,7 +19,6 @@
1919
import pyarrow
2020
import pyarrow as pa
2121
from pydantic import StrictStr
22-
from pytz import utc
2322

2423
from feast import OnDemandFeatureView
2524
from feast.data_source import DataSource
@@ -100,8 +99,8 @@ def pull_latest_from_table_or_query(
10099
athena_client = aws_utils.get_athena_data_client(config.offline_store.region)
101100
s3_resource = aws_utils.get_s3_resource(config.offline_store.region)
102101

103-
start_date = start_date.astimezone(tz=utc)
104-
end_date = end_date.astimezone(tz=utc)
102+
start_date = start_date.astimezone(tz=timezone.utc)
103+
end_date = end_date.astimezone(tz=timezone.utc)
105104

106105
query = f"""
107106
SELECT
@@ -151,7 +150,7 @@ def pull_all_from_table_or_query(
151150
query = f"""
152151
SELECT {field_string}
153152
FROM {from_expression}
154-
WHERE {timestamp_field} BETWEEN TIMESTAMP '{start_date.astimezone(tz=utc).strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]}' AND TIMESTAMP '{end_date.astimezone(tz=utc).strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]}'
153+
WHERE {timestamp_field} BETWEEN TIMESTAMP '{start_date.astimezone(tz=timezone.utc).strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]}' AND TIMESTAMP '{end_date.astimezone(tz=timezone.utc).strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]}'
155154
{"AND "+date_partition_column+" >= '"+start_date.strftime('%Y-%m-%d')+"' AND "+date_partition_column+" <= '"+end_date.strftime('%Y-%m-%d')+"' " if date_partition_column != "" and date_partition_column is not None else ''}
156155
"""
157156

sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import contextlib
22
from dataclasses import asdict
3-
from datetime import datetime
3+
from datetime import datetime, timezone
44
from typing import (
55
Any,
66
Callable,
@@ -20,7 +20,6 @@
2020
import pyarrow as pa
2121
from jinja2 import BaseLoader, Environment
2222
from psycopg import sql
23-
from pytz import utc
2423

2524
from feast.data_source import DataSource
2625
from feast.errors import InvalidEntityType, ZeroColumnQueryResult, ZeroRowsQueryResult
@@ -214,8 +213,8 @@ def pull_all_from_table_or_query(
214213
join_key_columns + feature_name_columns + [timestamp_field]
215214
)
216215

217-
start_date = start_date.astimezone(tz=utc)
218-
end_date = end_date.astimezone(tz=utc)
216+
start_date = start_date.astimezone(tz=timezone.utc)
217+
end_date = end_date.astimezone(tz=timezone.utc)
219218

220219
query = f"""
221220
SELECT {field_string}

sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark.py

+4-8
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import tempfile
33
import uuid
44
import warnings
5-
from datetime import datetime
5+
from datetime import datetime, timezone
66
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
77

88
import numpy as np
@@ -14,7 +14,6 @@
1414
from pydantic import StrictStr
1515
from pyspark import SparkConf
1616
from pyspark.sql import SparkSession
17-
from pytz import utc
1817

1918
from feast import FeatureView, OnDemandFeatureView
2019
from feast.data_source import DataSource
@@ -284,8 +283,8 @@ def pull_all_from_table_or_query(
284283

285284
fields = ", ".join(join_key_columns + feature_name_columns + [timestamp_field])
286285
from_expression = data_source.get_table_query_string()
287-
start_date = start_date.astimezone(tz=utc)
288-
end_date = end_date.astimezone(tz=utc)
286+
start_date = start_date.astimezone(tz=timezone.utc)
287+
end_date = end_date.astimezone(tz=timezone.utc)
289288

290289
query = f"""
291290
SELECT {fields}
@@ -520,21 +519,18 @@ def _upload_entity_df(
520519
entity_df[event_timestamp_col], utc=True
521520
)
522521
spark_session.createDataFrame(entity_df).createOrReplaceTempView(table_name)
523-
return
524522
elif isinstance(entity_df, str):
525523
spark_session.sql(entity_df).createOrReplaceTempView(table_name)
526-
return
527524
elif isinstance(entity_df, pyspark.sql.DataFrame):
528525
entity_df.createOrReplaceTempView(table_name)
529-
return
530526
else:
531527
raise InvalidEntityType(type(entity_df))
532528

533529

534530
def _format_datetime(t: datetime) -> str:
535531
# Since Hive does not support timezone, need to transform to utc.
536532
if t.tzinfo:
537-
t = t.astimezone(tz=utc)
533+
t = t.astimezone(tz=timezone.utc)
538534
dt = t.strftime("%Y-%m-%d %H:%M:%S.%f")
539535
return dt
540536

sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/connectors/upload.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,12 @@
1818
```
1919
"""
2020

21-
from datetime import datetime
21+
from datetime import datetime, timezone
2222
from typing import Any, Dict, Iterator, Optional, Set
2323

2424
import numpy as np
2525
import pandas as pd
2626
import pyarrow
27-
from pytz import utc
2827

2928
from feast.infra.offline_stores.contrib.trino_offline_store.trino_queries import Trino
3029
from feast.infra.offline_stores.contrib.trino_offline_store.trino_type_map import (
@@ -141,7 +140,7 @@ def _format_value(row: pd.Series, schema: Dict[str, Any]) -> str:
141140

142141
def format_datetime(t: datetime) -> str:
143142
if t.tzinfo:
144-
t = t.astimezone(tz=utc)
143+
t = t.astimezone(tz=timezone.utc)
145144
return t.strftime("%Y-%m-%d %H:%M:%S.%f")
146145

147146

sdk/python/feast/infra/offline_stores/dask.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import os
22
import uuid
3-
from datetime import datetime
3+
from datetime import datetime, timezone
44
from pathlib import Path
55
from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union
66

@@ -178,6 +178,8 @@ def evaluate_historical_retrieval():
178178
entity_df_event_timestamp_col_type = entity_df_with_features.dtypes[
179179
entity_df_event_timestamp_col
180180
]
181+
182+
# TODO: need to figure out why the value of entity_df_event_timestamp_col_type.tz is pytz.UTC
181183
if (
182184
not hasattr(entity_df_event_timestamp_col_type, "tz")
183185
or entity_df_event_timestamp_col_type.tz != pytz.UTC
@@ -189,7 +191,7 @@ def evaluate_historical_retrieval():
189191
].apply(
190192
lambda x: x
191193
if x.tzinfo is not None
192-
else x.replace(tzinfo=pytz.utc)
194+
else x.replace(tzinfo=timezone.utc)
193195
)
194196
)
195197

@@ -616,6 +618,7 @@ def _normalize_timestamp(
616618
if created_timestamp_column:
617619
created_timestamp_column_type = df_to_join_types[created_timestamp_column]
618620

621+
# TODO: need to figure out why the value of timestamp_field_type.tz is pytz.UTC
619622
if not hasattr(timestamp_field_type, "tz") or timestamp_field_type.tz != pytz.UTC:
620623
# if you are querying for the event timestamp field, we have to deduplicate
621624
if len(df_to_join[timestamp_field].shape) > 1:
@@ -624,10 +627,11 @@ def _normalize_timestamp(
624627

625628
# Make sure all timestamp fields are tz-aware. We default tz-naive fields to UTC
626629
df_to_join[timestamp_field] = df_to_join[timestamp_field].apply(
627-
lambda x: x if x.tzinfo is not None else x.replace(tzinfo=pytz.utc),
630+
lambda x: x if x.tzinfo else x.replace(tzinfo=timezone.utc),
628631
meta=(timestamp_field, "datetime64[ns, UTC]"),
629632
)
630633

634+
# TODO: need to figure out why the value of created_timestamp_column_type.tz is pytz.UTC
631635
if created_timestamp_column and (
632636
not hasattr(created_timestamp_column_type, "tz")
633637
or created_timestamp_column_type.tz != pytz.UTC
@@ -640,7 +644,7 @@ def _normalize_timestamp(
640644
df_to_join[created_timestamp_column] = df_to_join[
641645
created_timestamp_column
642646
].apply(
643-
lambda x: x if x.tzinfo is not None else x.replace(tzinfo=pytz.utc),
647+
lambda x: x if x.tzinfo else x.replace(tzinfo=timezone.utc),
644648
meta=(timestamp_field, "datetime64[ns, UTC]"),
645649
)
646650

sdk/python/feast/infra/offline_stores/ibis.py

+5-6
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import random
22
import string
33
import uuid
4-
from datetime import datetime, timedelta
4+
from datetime import datetime, timedelta, timezone
55
from pathlib import Path
66
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
77

@@ -12,7 +12,6 @@
1212
import pyarrow
1313
from ibis.expr import datatypes as dt
1414
from ibis.expr.types import Table
15-
from pytz import utc
1615

1716
from feast.data_source import DataSource
1817
from feast.feature_logging import LoggingConfig, LoggingSource
@@ -55,8 +54,8 @@ def pull_latest_from_table_or_query_ibis(
5554
fields = join_key_columns + feature_name_columns + [timestamp_field]
5655
if created_timestamp_column:
5756
fields.append(created_timestamp_column)
58-
start_date = start_date.astimezone(tz=utc)
59-
end_date = end_date.astimezone(tz=utc)
57+
start_date = start_date.astimezone(tz=timezone.utc)
58+
end_date = end_date.astimezone(tz=timezone.utc)
6059

6160
table = data_source_reader(data_source)
6261

@@ -265,8 +264,8 @@ def pull_all_from_table_or_query_ibis(
265264
staging_location_endpoint_override: Optional[str] = None,
266265
) -> RetrievalJob:
267266
fields = join_key_columns + feature_name_columns + [timestamp_field]
268-
start_date = start_date.astimezone(tz=utc)
269-
end_date = end_date.astimezone(tz=utc)
267+
start_date = start_date.astimezone(tz=timezone.utc)
268+
end_date = end_date.astimezone(tz=timezone.utc)
270269

271270
table = data_source_reader(data_source)
272271

sdk/python/feast/infra/offline_stores/redshift.py

+5-6
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import contextlib
22
import uuid
3-
from datetime import datetime
3+
from datetime import datetime, timezone
44
from pathlib import Path
55
from typing import (
66
Any,
@@ -21,7 +21,6 @@
2121
import pyarrow as pa
2222
from dateutil import parser
2323
from pydantic import StrictStr, model_validator
24-
from pytz import utc
2524

2625
from feast import OnDemandFeatureView, RedshiftSource
2726
from feast.data_source import DataSource
@@ -127,8 +126,8 @@ def pull_latest_from_table_or_query(
127126
)
128127
s3_resource = aws_utils.get_s3_resource(config.offline_store.region)
129128

130-
start_date = start_date.astimezone(tz=utc)
131-
end_date = end_date.astimezone(tz=utc)
129+
start_date = start_date.astimezone(tz=timezone.utc)
130+
end_date = end_date.astimezone(tz=timezone.utc)
132131

133132
query = f"""
134133
SELECT
@@ -174,8 +173,8 @@ def pull_all_from_table_or_query(
174173
)
175174
s3_resource = aws_utils.get_s3_resource(config.offline_store.region)
176175

177-
start_date = start_date.astimezone(tz=utc)
178-
end_date = end_date.astimezone(tz=utc)
176+
start_date = start_date.astimezone(tz=timezone.utc)
177+
end_date = end_date.astimezone(tz=timezone.utc)
179178

180179
query = f"""
181180
SELECT {field_string}

0 commit comments

Comments
 (0)