Skip to content
2 changes: 1 addition & 1 deletion datafusion-examples/examples/data_io/parquet_encrypted.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ pub async fn parquet_encrypted() -> datafusion::common::Result<()> {
// Read encrypted parquet back as a DataFrame using matching decryption config
let ctx: SessionContext = SessionContext::new();
let read_options =
ParquetReadOptions::default().file_decryption_properties((&decrypt).into());
ParquetReadOptions::default().file_decryption_properties((&decrypt).try_into()?);

let encrypted_parquet_df = ctx
.read_parquet(tempfile.to_str().unwrap(), read_options)
Expand Down
61 changes: 53 additions & 8 deletions datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3024,8 +3024,18 @@ impl From<ConfigFileDecryptionProperties> for FileDecryptionProperties {
}
Comment on lines 3022 to 3053
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As a follow up, I wonder if the reverse should also be a TryFrom, since it's full of unwraps.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good, I will file an issue for this.


#[cfg(feature = "parquet_encryption")]
impl From<&Arc<FileDecryptionProperties>> for ConfigFileDecryptionProperties {
fn from(f: &Arc<FileDecryptionProperties>) -> Self {
impl TryFrom<&Arc<FileDecryptionProperties>> for ConfigFileDecryptionProperties {
Comment thread
adamreeve marked this conversation as resolved.
type Error = DataFusionError;

fn try_from(f: &Arc<FileDecryptionProperties>) -> Result<Self> {
let footer_key = f.footer_key(None).map_err(|e| {
Comment thread
adamreeve marked this conversation as resolved.
DataFusionError::Configuration(format!(
"Could not retrieve footer key from FileDecryptionProperties. \
Note that conversion to ConfigFileDecryptionProperties is not supported \
when using a key retriever: {e}"
))
})?;

let (column_names_vec, column_keys_vec) = f.column_keys();
let mut column_decryption_properties: HashMap<
String,
Expand All @@ -3039,14 +3049,12 @@ impl From<&Arc<FileDecryptionProperties>> for ConfigFileDecryptionProperties {
}

let aad_prefix = f.aad_prefix().cloned().unwrap_or_default();
ConfigFileDecryptionProperties {
footer_key_as_hex: hex::encode(
f.footer_key(None).unwrap_or_default().as_ref(),
),
Ok(ConfigFileDecryptionProperties {
footer_key_as_hex: hex::encode(footer_key.as_ref()),
column_decryption_properties,
aad_prefix_as_hex: hex::encode(aad_prefix),
footer_signature_verification: f.check_plaintext_footer_integrity(),
}
})
}
}

Expand Down Expand Up @@ -3519,7 +3527,8 @@ mod tests {
Arc::new(FileEncryptionProperties::from(config_encrypt.clone()));
assert_eq!(file_encryption_properties, encryption_properties_built);

let config_decrypt = ConfigFileDecryptionProperties::from(&decryption_properties);
let config_decrypt =
ConfigFileDecryptionProperties::try_from(&decryption_properties).unwrap();
let decryption_properties_built =
Arc::new(FileDecryptionProperties::from(config_decrypt.clone()));
assert_eq!(decryption_properties, decryption_properties_built);
Expand Down Expand Up @@ -3637,6 +3646,42 @@ mod tests {
assert_eq!(factory_options.get("key2"), Some(&"value 2".to_string()));
}

#[cfg(feature = "parquet_encryption")]
struct ParquetEncryptionKeyRetriever {}

#[cfg(feature = "parquet_encryption")]
impl parquet::encryption::decrypt::KeyRetriever for ParquetEncryptionKeyRetriever {
fn retrieve_key(&self, key_metadata: &[u8]) -> parquet::errors::Result<Vec<u8>> {
if !key_metadata.is_empty() {
Ok(b"1234567890123450".to_vec())
} else {
Err(parquet::errors::ParquetError::General(
"Key metadata not provided".to_string(),
))
}
}
}

#[cfg(feature = "parquet_encryption")]
#[test]
fn conversion_from_key_retriever_to_config_file_decryption_properties() {
use crate::Result;
use crate::config::ConfigFileDecryptionProperties;
use crate::encryption::FileDecryptionProperties;

let retriever = std::sync::Arc::new(ParquetEncryptionKeyRetriever {});
let decryption_properties =
FileDecryptionProperties::with_key_retriever(retriever)
.build()
.unwrap();
let config_file_decryption_properties: Result<ConfigFileDecryptionProperties> =
(&decryption_properties).try_into();
assert!(config_file_decryption_properties.is_err());
let err = config_file_decryption_properties.unwrap_err().to_string();
assert!(err.contains("key retriever"));
assert!(err.contains("Key metadata not provided"));
}

#[cfg(feature = "parquet")]
#[test]
fn parquet_table_options_config_entry() {
Expand Down
4 changes: 2 additions & 2 deletions datafusion/core/src/dataframe/parquet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -312,8 +312,8 @@ mod tests {

// Read encrypted parquet
let ctx: SessionContext = SessionContext::new();
let read_options =
ParquetReadOptions::default().file_decryption_properties((&decrypt).into());
let read_options = ParquetReadOptions::default()
.file_decryption_properties((&decrypt).try_into()?);

ctx.register_parquet("roundtrip_parquet", &tempfile_str, read_options.clone())
.await?;
Expand Down
4 changes: 2 additions & 2 deletions datafusion/core/tests/parquet/encryption.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,8 @@ async fn round_trip_encryption() {

// Read encrypted parquet
let ctx: SessionContext = SessionContext::new();
let options =
ParquetReadOptions::default().file_decryption_properties((&decrypt).into());
let options = ParquetReadOptions::default()
.file_decryption_properties((&decrypt).try_into().unwrap());

let encrypted_batches = read_parquet_test_data(
tempfile.into_os_string().into_string().unwrap(),
Expand Down
19 changes: 19 additions & 0 deletions docs/source/library-user-guide/upgrading/54.0.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -450,3 +450,22 @@ clusters (e.g., ZWJ emoji sequences). For ASCII and most common Unicode text,
behavior is unchanged.

[#17861]: https://github.com/apache/datafusion/pull/17861

### Conversion from `FileDecryptionProperties` to `ConfigFileDecryptionProperties` is now fallible

Previously, `datafusion_common::config::ConfigFileDecryptionProperties`
Comment thread
adamreeve marked this conversation as resolved.
implemented `From<&Arc<parquet::encryption::decrypt::FileDecryptionProperties>>`.
If an error was encountered when retrieving the footer key without providing key metadata,
the error would be ignored and an empty footer key set in the result.
This could lead to obscure errors later.

`ConfigFileDecryptionProperties` now instead implements `TryFrom<&Arc<FileDecryptionProperties>>`,
and errors retrieving the footer key will be propagated up.

Code that uses `ConfigFileDecryptionProperties::from(&Arc<FileDecryptionProperties>)`
should be updated to use `try_from`,
and calls to `FileDecryptionProperties::into` should be changed to `try_into`,
with appropriate error handling added.

See [#21602](https://github.com/apache/datafusion/issues/21602) and
[PR #21603](https://github.com/apache/datafusion/pull/21603) for details.
Loading