From 64e9a603f8272c1e190e9884df0cc96c55ea174e Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Thu, 22 Jan 2026 09:31:13 -0800 Subject: [PATCH 1/5] add docs and refactor --- pyiceberg/catalog/hive.py | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/pyiceberg/catalog/hive.py b/pyiceberg/catalog/hive.py index 3046e25626..e161f0a81e 100644 --- a/pyiceberg/catalog/hive.py +++ b/pyiceberg/catalog/hive.py @@ -551,23 +551,30 @@ def commit_table( if hive_table and current_table: # Table exists, update it. - new_parameters = _construct_parameters( + + # Note on table properties: + # - Iceberg table properties are stored in both HMS and Iceberg metadata JSON. + # - Updates are reflected in both locations + # - Existing HMS table properties (set by external systems like Hive/Spark) are preserved. + # + # While it is possible to modify HMS table properties through this API, it is not recommended: + # - New/Updated HMS table properties will also be stored in Iceberg metadata (even though it's HMS-specific) + # - HMS properties cannot be deleted since they are not visible to Iceberg + # - Mixing HMS-specific properties in Iceberg metadata can cause confusion + new_iceberg_properties = _construct_parameters( metadata_location=updated_staged_table.metadata_location, previous_metadata_location=current_table.metadata_location, metadata_properties=updated_staged_table.properties, ) - # Detect properties that were removed from Iceberg metadata - removed_keys = current_table.properties.keys() - updated_staged_table.properties.keys() - - # Sync HMS parameters: Iceberg metadata is the source of truth, HMS parameters are - # a projection of Iceberg state plus any HMS-only properties. - # Start with existing HMS params, remove deleted Iceberg properties, then apply Iceberg values. - merged_params = dict(hive_table.parameters or {}) - for key in removed_keys: - merged_params.pop(key, None) - merged_params.update(new_parameters) - hive_table.parameters = merged_params + deleted_iceberg_properties = current_table.properties.keys() - updated_staged_table.properties.keys() + + # Merge: preserve HMS-only properties, remove deleted Iceberg properties, apply new Iceberg properties + existing_hms_parameters = dict(hive_table.parameters or {}) + for key in deleted_iceberg_properties: + existing_hms_parameters.pop(key, None) + existing_hms_parameters.update(new_iceberg_properties) + hive_table.parameters = existing_hms_parameters # Update hive's schema and properties hive_table.sd = _construct_hive_storage_descriptor( From c252fb78f52e531400f259a118acc7edac8d2787 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Fri, 23 Jan 2026 09:30:49 -0800 Subject: [PATCH 2/5] Update pyiceberg/catalog/hive.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- pyiceberg/catalog/hive.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyiceberg/catalog/hive.py b/pyiceberg/catalog/hive.py index e161f0a81e..2a52324d54 100644 --- a/pyiceberg/catalog/hive.py +++ b/pyiceberg/catalog/hive.py @@ -558,7 +558,7 @@ def commit_table( # - Existing HMS table properties (set by external systems like Hive/Spark) are preserved. # # While it is possible to modify HMS table properties through this API, it is not recommended: - # - New/Updated HMS table properties will also be stored in Iceberg metadata (even though it's HMS-specific) + # - New/Updated HMS table properties will also be stored in Iceberg metadata (even though it is HMS-specific) # - HMS properties cannot be deleted since they are not visible to Iceberg # - Mixing HMS-specific properties in Iceberg metadata can cause confusion new_iceberg_properties = _construct_parameters( From 6a928209bdd93f2d561c34e4f23ffc4241008558 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Fri, 23 Jan 2026 12:37:52 -0500 Subject: [PATCH 3/5] feedback --- pyiceberg/catalog/hive.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pyiceberg/catalog/hive.py b/pyiceberg/catalog/hive.py index 2a52324d54..1bec186ca8 100644 --- a/pyiceberg/catalog/hive.py +++ b/pyiceberg/catalog/hive.py @@ -558,9 +558,11 @@ def commit_table( # - Existing HMS table properties (set by external systems like Hive/Spark) are preserved. # # While it is possible to modify HMS table properties through this API, it is not recommended: - # - New/Updated HMS table properties will also be stored in Iceberg metadata (even though it is HMS-specific) - # - HMS properties cannot be deleted since they are not visible to Iceberg # - Mixing HMS-specific properties in Iceberg metadata can cause confusion + # - New/updated HMS table properties will also be stored in Iceberg metadata (even though it is HMS-specific) + # - HMS-native properties (set outside Iceberg) cannot be deleted since they are not visible to Iceberg + # (However, if you first SET an HMS property via Iceberg, it becomes tracked in Iceberg metadata, + # and can then be deleted via Iceberg - which removes it from both Iceberg metadata and HMS) new_iceberg_properties = _construct_parameters( metadata_location=updated_staged_table.metadata_location, previous_metadata_location=current_table.metadata_location, @@ -569,7 +571,7 @@ def commit_table( # Detect properties that were removed from Iceberg metadata deleted_iceberg_properties = current_table.properties.keys() - updated_staged_table.properties.keys() - # Merge: preserve HMS-only properties, remove deleted Iceberg properties, apply new Iceberg properties + # Merge: preserve HMS-native properties, remove deleted Iceberg properties, apply new Iceberg properties existing_hms_parameters = dict(hive_table.parameters or {}) for key in deleted_iceberg_properties: existing_hms_parameters.pop(key, None) From 2bbdfe750df5e7fa44244242458ae2b5614afd64 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Sat, 24 Jan 2026 15:38:44 -0500 Subject: [PATCH 4/5] add a test for new case --- tests/integration/test_reads.py | 53 +++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/tests/integration/test_reads.py b/tests/integration/test_reads.py index 8de1925eec..641cadc3e4 100644 --- a/tests/integration/test_reads.py +++ b/tests/integration/test_reads.py @@ -286,6 +286,59 @@ def test_hive_critical_properties_always_from_iceberg(catalog: Catalog) -> None: assert new_metadata_location != original_metadata_location, "metadata_location should be updated!" +@pytest.mark.integration +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive")]) +def test_hive_native_properties_cannot_be_deleted_via_iceberg(catalog: Catalog) -> None: + """Test that HMS-native properties (set outside Iceberg) cannot be deleted via Iceberg. + + HMS-native properties are not visible to Iceberg, so remove_properties has no effect on them. + However, if you first SET an HMS property via Iceberg (making it tracked in Iceberg metadata), + it can then be deleted via Iceberg. + """ + table = create_table(catalog) + hive_client: _HiveClient = _HiveClient(catalog.properties["uri"]) + + # Set an HMS-native property directly (not through Iceberg) + with hive_client as open_client: + hive_table = open_client.get_table(*TABLE_NAME) + hive_table.parameters["hms_native_prop"] = "native_value" + open_client.alter_table(TABLE_NAME[0], TABLE_NAME[1], hive_table) + + # Verify the HMS-native property exists + with hive_client as open_client: + hive_table = open_client.get_table(*TABLE_NAME) + assert hive_table.parameters.get("hms_native_prop") == "native_value" + + # Attempt to remove the HMS-native property via Iceberg - should have no effect + # because it's not tracked in Iceberg metadata (not visible to Iceberg) + table.transaction().remove_properties("hms_native_prop").commit_transaction() + + # HMS-native property should still exist (cannot be deleted via Iceberg) + with hive_client as open_client: + hive_table = open_client.get_table(*TABLE_NAME) + assert hive_table.parameters.get("hms_native_prop") == "native_value", ( + "HMS-native property should NOT be deletable via Iceberg since it's not visible to Iceberg!" + ) + + # Now SET the same property via Iceberg (this makes it tracked in Iceberg metadata) + table.transaction().set_properties({"hms_native_prop": "iceberg_value"}).commit_transaction() + + # Verify it's updated + with hive_client as open_client: + hive_table = open_client.get_table(*TABLE_NAME) + assert hive_table.parameters.get("hms_native_prop") == "iceberg_value" + + # Now we CAN delete it via Iceberg (because it's now tracked in Iceberg metadata) + table.transaction().remove_properties("hms_native_prop").commit_transaction() + + # Property should be deleted from HMS + with hive_client as open_client: + hive_table = open_client.get_table(*TABLE_NAME) + assert hive_table.parameters.get("hms_native_prop") is None, ( + "Property should be deletable after being SET via Iceberg (making it tracked)!" + ) + + @pytest.mark.integration @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_table_properties_dict(catalog: Catalog) -> None: From 3b4a9691b6cfad967a369888294040f9d8b5ed19 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Sat, 24 Jan 2026 15:57:23 -0500 Subject: [PATCH 5/5] fix test --- tests/integration/test_reads.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/tests/integration/test_reads.py b/tests/integration/test_reads.py index 641cadc3e4..8811330a8a 100644 --- a/tests/integration/test_reads.py +++ b/tests/integration/test_reads.py @@ -291,7 +291,7 @@ def test_hive_critical_properties_always_from_iceberg(catalog: Catalog) -> None: def test_hive_native_properties_cannot_be_deleted_via_iceberg(catalog: Catalog) -> None: """Test that HMS-native properties (set outside Iceberg) cannot be deleted via Iceberg. - HMS-native properties are not visible to Iceberg, so remove_properties has no effect on them. + HMS-native properties are not visible to Iceberg, so remove_properties fails with KeyError. However, if you first SET an HMS property via Iceberg (making it tracked in Iceberg metadata), it can then be deleted via Iceberg. """ @@ -304,26 +304,33 @@ def test_hive_native_properties_cannot_be_deleted_via_iceberg(catalog: Catalog) hive_table.parameters["hms_native_prop"] = "native_value" open_client.alter_table(TABLE_NAME[0], TABLE_NAME[1], hive_table) - # Verify the HMS-native property exists + # Verify the HMS-native property exists in HMS with hive_client as open_client: hive_table = open_client.get_table(*TABLE_NAME) assert hive_table.parameters.get("hms_native_prop") == "native_value" - # Attempt to remove the HMS-native property via Iceberg - should have no effect - # because it's not tracked in Iceberg metadata (not visible to Iceberg) - table.transaction().remove_properties("hms_native_prop").commit_transaction() + # Refresh the Iceberg table to get the latest state + table.refresh() + + # Verify the HMS-native property is NOT visible in Iceberg + assert "hms_native_prop" not in table.properties + + # Attempt to remove the HMS-native property via Iceberg - this should fail + # because the property is not tracked in Iceberg metadata (not visible to Iceberg) + with pytest.raises(KeyError): + table.transaction().remove_properties("hms_native_prop").commit_transaction() # HMS-native property should still exist (cannot be deleted via Iceberg) with hive_client as open_client: hive_table = open_client.get_table(*TABLE_NAME) assert hive_table.parameters.get("hms_native_prop") == "native_value", ( - "HMS-native property should NOT be deletable via Iceberg since it's not visible to Iceberg!" + "HMS-native property should still exist since Iceberg removal failed!" ) # Now SET the same property via Iceberg (this makes it tracked in Iceberg metadata) table.transaction().set_properties({"hms_native_prop": "iceberg_value"}).commit_transaction() - # Verify it's updated + # Verify it's updated in both places with hive_client as open_client: hive_table = open_client.get_table(*TABLE_NAME) assert hive_table.parameters.get("hms_native_prop") == "iceberg_value"