Skip to content

Commit 6e8e88e

Browse files
authored
Merge branch 'main' into patch-1
2 parents d5dba3a + 48bd3e9 commit 6e8e88e

File tree

4 files changed

+28
-17
lines changed

4 files changed

+28
-17
lines changed

awswrangler/athena/_write_iceberg.py

+6
Original file line numberDiff line numberDiff line change
@@ -678,6 +678,7 @@ def delete_from_iceberg_table(
678678
workgroup: str = "primary",
679679
encryption: str | None = None,
680680
kms_key: str | None = None,
681+
dtype: dict[str, str] | None = None,
681682
boto3_session: boto3.Session | None = None,
682683
s3_additional_kwargs: dict[str, Any] | None = None,
683684
catalog_id: str | None = None,
@@ -713,6 +714,10 @@ def delete_from_iceberg_table(
713714
Valid values: [``None``, ``"SSE_S3"``, ``"SSE_KMS"``]. Notice: ``"CSE_KMS"`` is not supported.
714715
kms_key
715716
For SSE-KMS, this is the KMS key ARN or ID.
717+
dtype
718+
Dictionary of columns names and Athena/Glue types to be casted.
719+
Useful when you have columns with undetermined or mixed data types.
720+
(e.g. {'col name': 'bigint', 'col2 name': 'int'})
716721
boto3_session
717722
The default boto3 session will be used if **boto3_session** receive ``None``.
718723
s3_additional_kwargs
@@ -774,6 +779,7 @@ def delete_from_iceberg_table(
774779
boto3_session=boto3_session,
775780
s3_additional_kwargs=s3_additional_kwargs,
776781
catalog_id=catalog_id,
782+
dtype=dtype,
777783
index=False,
778784
)
779785

poetry.lock

+16-16
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ jsonpath-ng = { version = "^1.5.3", optional = true }
6868
# Other
6969
openpyxl = { version = "^3.0.0", optional = true }
7070
progressbar2 = { version = "^4.0.0", optional = true }
71-
deltalake = { version = ">=0.18.0,<0.25.0", optional = true }
71+
deltalake = { version = ">=0.18.0,<0.26.0", optional = true }
7272
geopandas = { version = "^1.0.0", optional = true }
7373

7474
# Distributed

tests/unit/test_athena_iceberg.py

+5
Original file line numberDiff line numberDiff line change
@@ -870,6 +870,7 @@ def test_athena_delete_from_iceberg_table(
870870
"id": [1, 2, 3],
871871
"name": ["a", "b", "c"],
872872
"ts": [ts("2020-01-01 00:00:00.0"), ts("2020-01-02 00:00:01.0"), ts("2020-01-03 00:00:00.0")],
873+
"empty": [pd.NA, pd.NA, pd.NA],
873874
}
874875
)
875876
df["id"] = df["id"].astype("Int64") # Cast as nullable int64 type
@@ -883,6 +884,7 @@ def test_athena_delete_from_iceberg_table(
883884
temp_path=path2,
884885
partition_cols=partition_cols,
885886
keep_files=False,
887+
dtype={"empty": "string"},
886888
)
887889

888890
wr.athena.delete_from_iceberg_table(
@@ -892,6 +894,7 @@ def test_athena_delete_from_iceberg_table(
892894
temp_path=path2,
893895
merge_cols=["id"],
894896
keep_files=False,
897+
dtype={"empty": "string"},
895898
)
896899

897900
df_actual = wr.athena.read_sql_query(
@@ -906,10 +909,12 @@ def test_athena_delete_from_iceberg_table(
906909
"id": [3],
907910
"name": ["c"],
908911
"ts": [ts("2020-01-03 00:00:00.0")],
912+
"empty": [pd.NA],
909913
}
910914
)
911915
df_expected["id"] = df_expected["id"].astype("Int64") # Cast as nullable int64 type
912916
df_expected["name"] = df_expected["name"].astype("string")
917+
df_expected["empty"] = df_expected["empty"].astype("string")
913918

914919
assert_pandas_equals(df_expected, df_actual)
915920

0 commit comments

Comments
 (0)