Skip to content

Commit 3bcb8c7

Browse files
committed
Removed non string value types
1 parent deec858 commit 3bcb8c7

2 files changed

Lines changed: 7 additions & 171 deletions

File tree

datafusion/physical-plan/src/aggregates/group_values/mod.rs

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -272,13 +272,5 @@ fn supported_single_dictionary_value(t: &DataType) -> bool {
272272
| DataType::LargeBinary
273273
| DataType::Utf8View
274274
| DataType::BinaryView
275-
| DataType::Int8
276-
| DataType::Int16
277-
| DataType::Int32
278-
| DataType::Int64
279-
| DataType::UInt8
280-
| DataType::UInt16
281-
| DataType::UInt32
282-
| DataType::UInt64
283275
)
284276
}

datafusion/physical-plan/src/aggregates/group_values/single_group_by/dictionary.rs

Lines changed: 7 additions & 163 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,12 @@
1818
use crate::aggregates::group_values::GroupValues;
1919
use crate::hash_utils::RandomState;
2020
use arrow::array::{
21-
Array, ArrayRef, AsArray, BinaryArray, BinaryBuilder, BinaryViewArray,
22-
BinaryViewBuilder, DictionaryArray, LargeBinaryArray, LargeBinaryBuilder,
23-
LargeStringArray, LargeStringBuilder, PrimitiveArray, PrimitiveBuilder, StringArray,
24-
StringBuilder, StringViewArray, StringViewBuilder, UInt64Array,
25-
};
26-
use arrow::datatypes::{
27-
ArrowDictionaryKeyType, ArrowNativeType, DataType, Int8Type, Int16Type, Int32Type,
28-
Int64Type, UInt8Type, UInt16Type, UInt32Type, UInt64Type,
21+
Array, ArrayRef, BinaryArray, BinaryBuilder, BinaryViewArray, BinaryViewBuilder,
22+
DictionaryArray, LargeBinaryArray, LargeBinaryBuilder, LargeStringArray,
23+
LargeStringBuilder, PrimitiveArray, PrimitiveBuilder, StringArray, StringBuilder,
24+
StringViewArray, StringViewBuilder, UInt64Array,
2925
};
26+
use arrow::datatypes::{ArrowDictionaryKeyType, ArrowNativeType, DataType};
3027
use datafusion_common::Result;
3128
use datafusion_common::hash_utils::create_hashes;
3229
use datafusion_expr::EmitTo;
@@ -132,46 +129,6 @@ impl<K: ArrowDictionaryKeyType + Send> GroupValuesDictionary<K> {
132129
.downcast_ref::<BinaryViewArray>()
133130
.expect("Expected BinaryViewArray")
134131
.value(index),
135-
DataType::Int8 => {
136-
let arr = values.as_primitive::<Int8Type>();
137-
let val = arr.value(index);
138-
unsafe { std::slice::from_raw_parts(&val as *const i8 as *const u8, 1) }
139-
}
140-
DataType::Int16 => {
141-
let arr = values.as_primitive::<Int16Type>();
142-
let val = arr.value(index);
143-
unsafe { std::slice::from_raw_parts(&val as *const i16 as *const u8, 2) }
144-
}
145-
DataType::Int32 => {
146-
let arr = values.as_primitive::<Int32Type>();
147-
let val = arr.value(index);
148-
unsafe { std::slice::from_raw_parts(&val as *const i32 as *const u8, 4) }
149-
}
150-
DataType::Int64 => {
151-
let arr = values.as_primitive::<Int64Type>();
152-
let val = arr.value(index);
153-
unsafe { std::slice::from_raw_parts(&val as *const i64 as *const u8, 8) }
154-
}
155-
DataType::UInt8 => {
156-
let arr = values.as_primitive::<UInt8Type>();
157-
let val = arr.value(index);
158-
unsafe { std::slice::from_raw_parts(&val as *const u8, 1) }
159-
}
160-
DataType::UInt16 => {
161-
let arr = values.as_primitive::<UInt16Type>();
162-
let val = arr.value(index);
163-
unsafe { std::slice::from_raw_parts(&val as *const u16 as *const u8, 2) }
164-
}
165-
DataType::UInt32 => {
166-
let arr = values.as_primitive::<UInt32Type>();
167-
let val = arr.value(index);
168-
unsafe { std::slice::from_raw_parts(&val as *const u32 as *const u8, 4) }
169-
}
170-
DataType::UInt64 => {
171-
let arr = values.as_primitive::<UInt64Type>();
172-
let val = arr.value(index);
173-
unsafe { std::slice::from_raw_parts(&val as *const u64 as *const u8, 8) }
174-
}
175132
other => unimplemented!("get_raw_bytes not implemented for {other:?}"),
176133
}
177134
}
@@ -188,15 +145,6 @@ impl<K: ArrowDictionaryKeyType + Send> GroupValuesDictionary<K> {
188145
}
189146
// for primitives use a byte sequence that is a different length than the native type
190147
// a real i8 is always exactly 1 byte so 2 bytes can never match a real value
191-
DataType::Int8 | DataType::UInt8 => vec![0xFF, 0xFF],
192-
// a real i16/u16 is always exactly 2 bytes so 3 bytes can never match
193-
DataType::Int16 | DataType::UInt16 => vec![0xFF, 0xFF, 0xFF],
194-
// a real i32/u32/f32 is always exactly 4 bytes so 5 bytes can never match
195-
DataType::Int32 | DataType::UInt32 => vec![0xFF, 0xFF, 0xFF, 0xFF, 0xFF],
196-
// a real i64/u64/f64 is always exactly 8 bytes so 9 bytes can never match
197-
DataType::Int64 | DataType::UInt64 => {
198-
vec![0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF]
199-
}
200148
other => unimplemented!("sentinel_repr not implemented for {other:?}"),
201149
}
202150
}
@@ -307,110 +255,6 @@ impl<K: ArrowDictionaryKeyType + Send> GroupValuesDictionary<K> {
307255
}
308256
Ok(Arc::new(builder.finish()) as ArrayRef)
309257
}
310-
DataType::Int8 => {
311-
let mut builder = PrimitiveBuilder::<Int8Type>::new();
312-
for raw_bytes in raw {
313-
if raw_bytes == &sentinel {
314-
builder.append_null();
315-
} else {
316-
builder.append_value(i8::from_ne_bytes(
317-
raw_bytes.as_slice().try_into().unwrap(),
318-
));
319-
}
320-
}
321-
Ok(Arc::new(builder.finish()) as ArrayRef)
322-
}
323-
DataType::Int16 => {
324-
let mut builder = PrimitiveBuilder::<Int16Type>::new();
325-
for raw_bytes in raw {
326-
if raw_bytes == &sentinel {
327-
builder.append_null();
328-
} else {
329-
builder.append_value(i16::from_ne_bytes(
330-
raw_bytes.as_slice().try_into().unwrap(),
331-
));
332-
}
333-
}
334-
Ok(Arc::new(builder.finish()) as ArrayRef)
335-
}
336-
DataType::Int32 => {
337-
let mut builder = PrimitiveBuilder::<Int32Type>::new();
338-
for raw_bytes in raw {
339-
if raw_bytes == &sentinel {
340-
builder.append_null();
341-
} else {
342-
builder.append_value(i32::from_ne_bytes(
343-
raw_bytes.as_slice().try_into().unwrap(),
344-
));
345-
}
346-
}
347-
Ok(Arc::new(builder.finish()) as ArrayRef)
348-
}
349-
DataType::Int64 => {
350-
let mut builder = PrimitiveBuilder::<Int64Type>::new();
351-
for raw_bytes in raw {
352-
if raw_bytes == &sentinel {
353-
builder.append_null();
354-
} else {
355-
builder.append_value(i64::from_ne_bytes(
356-
raw_bytes.as_slice().try_into().unwrap(),
357-
));
358-
}
359-
}
360-
Ok(Arc::new(builder.finish()) as ArrayRef)
361-
}
362-
DataType::UInt8 => {
363-
let mut builder = PrimitiveBuilder::<UInt8Type>::new();
364-
for raw_bytes in raw {
365-
if raw_bytes == &sentinel {
366-
builder.append_null();
367-
} else {
368-
builder.append_value(u8::from_ne_bytes(
369-
raw_bytes.as_slice().try_into().unwrap(),
370-
));
371-
}
372-
}
373-
Ok(Arc::new(builder.finish()) as ArrayRef)
374-
}
375-
DataType::UInt16 => {
376-
let mut builder = PrimitiveBuilder::<UInt16Type>::new();
377-
for raw_bytes in raw {
378-
if raw_bytes == &sentinel {
379-
builder.append_null();
380-
} else {
381-
builder.append_value(u16::from_ne_bytes(
382-
raw_bytes.as_slice().try_into().unwrap(),
383-
));
384-
}
385-
}
386-
Ok(Arc::new(builder.finish()) as ArrayRef)
387-
}
388-
DataType::UInt32 => {
389-
let mut builder = PrimitiveBuilder::<UInt32Type>::new();
390-
for raw_bytes in raw {
391-
if raw_bytes == &sentinel {
392-
builder.append_null();
393-
} else {
394-
builder.append_value(u32::from_ne_bytes(
395-
raw_bytes.as_slice().try_into().unwrap(),
396-
));
397-
}
398-
}
399-
Ok(Arc::new(builder.finish()) as ArrayRef)
400-
}
401-
DataType::UInt64 => {
402-
let mut builder = PrimitiveBuilder::<UInt64Type>::new();
403-
for raw_bytes in raw {
404-
if raw_bytes == &sentinel {
405-
builder.append_null();
406-
} else {
407-
builder.append_value(u64::from_ne_bytes(
408-
raw_bytes.as_slice().try_into().unwrap(),
409-
));
410-
}
411-
}
412-
Ok(Arc::new(builder.finish()) as ArrayRef)
413-
}
414258
other => Err(datafusion_common::DataFusionError::NotImplemented(format!(
415259
"transform_into_array not implemented for {other:?}"
416260
))),
@@ -638,7 +482,8 @@ impl<K: ArrowDictionaryKeyType + Send> GroupValues for GroupValuesDictionary<K>
638482
#[cfg(test)]
639483
mod group_values_trait_test {
640484
use super::*;
641-
use arrow::array::{DictionaryArray, StringArray, UInt8Array};
485+
use arrow::array::{DictionaryArray, Int32Array, StringArray, UInt8Array};
486+
use arrow::datatypes::{Int32Type, UInt8Type};
642487
use std::sync::Arc;
643488

644489
fn create_dict_array(keys: Vec<u8>, values: Vec<&str>) -> ArrayRef {
@@ -1274,7 +1119,6 @@ mod group_values_trait_test {
12741119

12751120
mod data_correctness {
12761121
use super::*;
1277-
use arrow::array::Int32Array;
12781122

12791123
#[test]
12801124
fn test_group_assignment_order() {

0 commit comments

Comments
 (0)