Skip to content

Commit 21248fb

Browse files
authored
Change default SQL mapping for VARCAHR from Utf8 to Utf8View (#16142)
* Change default mapping of SQL VARCHAR from Utf8 to Utf8View * Fix test * Fix doc * Add utf8view support for user defined * update testing data * fix * clippy * fix * Fix * Fix fmt * Fix * Fix slt testing * add doc for avro utf8view * Support utf8view for STRING_AGG * Address comments * fmt
1 parent 2c2f225 commit 21248fb

49 files changed

Lines changed: 238 additions & 193 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

datafusion-examples/examples/dataframe.rs

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,9 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use arrow::array::{ArrayRef, Int32Array, RecordBatch, StringArray};
18+
use arrow::array::{ArrayRef, Int32Array, RecordBatch, StringArray, StringViewArray};
1919
use datafusion::arrow::datatypes::{DataType, Field, Schema};
20+
use datafusion::catalog::MemTable;
2021
use datafusion::common::config::CsvOptions;
2122
use datafusion::common::parsers::CompressionTypeVariant;
2223
use datafusion::common::DataFusionError;
@@ -198,10 +199,16 @@ async fn read_memory_macro() -> Result<()> {
198199
/// 3. Write out a DataFrame to a csv file
199200
/// 4. Write out a DataFrame to a json file
200201
async fn write_out(ctx: &SessionContext) -> std::result::Result<(), DataFusionError> {
201-
let mut df = ctx.sql("values ('a'), ('b'), ('c')").await.unwrap();
202-
203-
// Ensure the column names and types match the target table
204-
df = df.with_column_renamed("column1", "tablecol1").unwrap();
202+
let array = StringViewArray::from(vec!["a", "b", "c"]);
203+
let schema = Arc::new(Schema::new(vec![Field::new(
204+
"tablecol1",
205+
DataType::Utf8View,
206+
false,
207+
)]));
208+
let batch = RecordBatch::try_new(schema.clone(), vec![Arc::new(array)])?;
209+
let mem_table = MemTable::try_new(schema.clone(), vec![vec![batch]])?;
210+
ctx.register_table("initial_data", Arc::new(mem_table))?;
211+
let df = ctx.table("initial_data").await?;
205212

206213
ctx.sql(
207214
"create external table

datafusion/common/src/config.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,7 @@ config_namespace! {
262262
/// If true, `VARCHAR` is mapped to `Utf8View` during SQL planning.
263263
/// If false, `VARCHAR` is mapped to `Utf8` during SQL planning.
264264
/// Default is false.
265-
pub map_varchar_to_utf8view: bool, default = false
265+
pub map_varchar_to_utf8view: bool, default = true
266266

267267
/// When set to true, the source locations relative to the original SQL
268268
/// query (i.e. [`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html)) will be collected

datafusion/core/tests/dataframe/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2505,6 +2505,11 @@ async fn write_table_with_order() -> Result<()> {
25052505
write_df = write_df
25062506
.with_column_renamed("column1", "tablecol1")
25072507
.unwrap();
2508+
2509+
// Ensure the column type matches the target table
2510+
write_df =
2511+
write_df.with_column("tablecol1", cast(col("tablecol1"), DataType::Utf8View))?;
2512+
25082513
let sql_str =
25092514
"create external table data(tablecol1 varchar) stored as parquet location '"
25102515
.to_owned()

datafusion/core/tests/sql/explain_analyze.rs

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -176,9 +176,9 @@ async fn csv_explain_plans() {
176176
// Verify schema
177177
let expected = vec![
178178
"Explain [plan_type:Utf8, plan:Utf8]",
179-
" Projection: aggregate_test_100.c1 [c1:Utf8]",
180-
" Filter: aggregate_test_100.c2 > Int64(10) [c1:Utf8, c2:Int8, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:UInt32, c10:UInt64, c11:Float32, c12:Float64, c13:Utf8]",
181-
" TableScan: aggregate_test_100 [c1:Utf8, c2:Int8, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:UInt32, c10:UInt64, c11:Float32, c12:Float64, c13:Utf8]",
179+
" Projection: aggregate_test_100.c1 [c1:Utf8View]",
180+
" Filter: aggregate_test_100.c2 > Int64(10) [c1:Utf8View, c2:Int8, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:UInt32, c10:UInt64, c11:Float32, c12:Float64, c13:Utf8View]",
181+
" TableScan: aggregate_test_100 [c1:Utf8View, c2:Int8, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:UInt32, c10:UInt64, c11:Float32, c12:Float64, c13:Utf8View]",
182182
];
183183
let formatted = plan.display_indent_schema().to_string();
184184
let actual: Vec<&str> = formatted.trim().lines().collect();
@@ -222,11 +222,11 @@ async fn csv_explain_plans() {
222222
" {",
223223
" graph[label=\"Detailed LogicalPlan\"]",
224224
" 7[shape=box label=\"Explain\\nSchema: [plan_type:Utf8, plan:Utf8]\"]",
225-
" 8[shape=box label=\"Projection: aggregate_test_100.c1\\nSchema: [c1:Utf8]\"]",
225+
" 8[shape=box label=\"Projection: aggregate_test_100.c1\\nSchema: [c1:Utf8View]\"]",
226226
" 7 -> 8 [arrowhead=none, arrowtail=normal, dir=back]",
227-
" 9[shape=box label=\"Filter: aggregate_test_100.c2 > Int64(10)\\nSchema: [c1:Utf8, c2:Int8, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:UInt32, c10:UInt64, c11:Float32, c12:Float64, c13:Utf8]\"]",
227+
" 9[shape=box label=\"Filter: aggregate_test_100.c2 > Int64(10)\\nSchema: [c1:Utf8View, c2:Int8, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:UInt32, c10:UInt64, c11:Float32, c12:Float64, c13:Utf8View]\"]",
228228
" 8 -> 9 [arrowhead=none, arrowtail=normal, dir=back]",
229-
" 10[shape=box label=\"TableScan: aggregate_test_100\\nSchema: [c1:Utf8, c2:Int8, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:UInt32, c10:UInt64, c11:Float32, c12:Float64, c13:Utf8]\"]",
229+
" 10[shape=box label=\"TableScan: aggregate_test_100\\nSchema: [c1:Utf8View, c2:Int8, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:UInt32, c10:UInt64, c11:Float32, c12:Float64, c13:Utf8View]\"]",
230230
" 9 -> 10 [arrowhead=none, arrowtail=normal, dir=back]",
231231
" }",
232232
"}",
@@ -250,9 +250,9 @@ async fn csv_explain_plans() {
250250
// Verify schema
251251
let expected = vec![
252252
"Explain [plan_type:Utf8, plan:Utf8]",
253-
" Projection: aggregate_test_100.c1 [c1:Utf8]",
254-
" Filter: aggregate_test_100.c2 > Int8(10) [c1:Utf8, c2:Int8]",
255-
" TableScan: aggregate_test_100 projection=[c1, c2], partial_filters=[aggregate_test_100.c2 > Int8(10)] [c1:Utf8, c2:Int8]",
253+
" Projection: aggregate_test_100.c1 [c1:Utf8View]",
254+
" Filter: aggregate_test_100.c2 > Int8(10) [c1:Utf8View, c2:Int8]",
255+
" TableScan: aggregate_test_100 projection=[c1, c2], partial_filters=[aggregate_test_100.c2 > Int8(10)] [c1:Utf8View, c2:Int8]",
256256
];
257257
let formatted = plan.display_indent_schema().to_string();
258258
let actual: Vec<&str> = formatted.trim().lines().collect();
@@ -296,11 +296,11 @@ async fn csv_explain_plans() {
296296
" {",
297297
" graph[label=\"Detailed LogicalPlan\"]",
298298
" 7[shape=box label=\"Explain\\nSchema: [plan_type:Utf8, plan:Utf8]\"]",
299-
" 8[shape=box label=\"Projection: aggregate_test_100.c1\\nSchema: [c1:Utf8]\"]",
299+
" 8[shape=box label=\"Projection: aggregate_test_100.c1\\nSchema: [c1:Utf8View]\"]",
300300
" 7 -> 8 [arrowhead=none, arrowtail=normal, dir=back]",
301-
" 9[shape=box label=\"Filter: aggregate_test_100.c2 > Int8(10)\\nSchema: [c1:Utf8, c2:Int8]\"]",
301+
" 9[shape=box label=\"Filter: aggregate_test_100.c2 > Int8(10)\\nSchema: [c1:Utf8View, c2:Int8]\"]",
302302
" 8 -> 9 [arrowhead=none, arrowtail=normal, dir=back]",
303-
" 10[shape=box label=\"TableScan: aggregate_test_100 projection=[c1, c2], partial_filters=[aggregate_test_100.c2 > Int8(10)]\\nSchema: [c1:Utf8, c2:Int8]\"]",
303+
" 10[shape=box label=\"TableScan: aggregate_test_100 projection=[c1, c2], partial_filters=[aggregate_test_100.c2 > Int8(10)]\\nSchema: [c1:Utf8View, c2:Int8]\"]",
304304
" 9 -> 10 [arrowhead=none, arrowtail=normal, dir=back]",
305305
" }",
306306
"}",
@@ -398,9 +398,9 @@ async fn csv_explain_verbose_plans() {
398398
// Verify schema
399399
let expected = vec![
400400
"Explain [plan_type:Utf8, plan:Utf8]",
401-
" Projection: aggregate_test_100.c1 [c1:Utf8]",
402-
" Filter: aggregate_test_100.c2 > Int64(10) [c1:Utf8, c2:Int8, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:UInt32, c10:UInt64, c11:Float32, c12:Float64, c13:Utf8]",
403-
" TableScan: aggregate_test_100 [c1:Utf8, c2:Int8, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:UInt32, c10:UInt64, c11:Float32, c12:Float64, c13:Utf8]",
401+
" Projection: aggregate_test_100.c1 [c1:Utf8View]",
402+
" Filter: aggregate_test_100.c2 > Int64(10) [c1:Utf8View, c2:Int8, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:UInt32, c10:UInt64, c11:Float32, c12:Float64, c13:Utf8View]",
403+
" TableScan: aggregate_test_100 [c1:Utf8View, c2:Int8, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:UInt32, c10:UInt64, c11:Float32, c12:Float64, c13:Utf8View]",
404404
];
405405
let formatted = dataframe.logical_plan().display_indent_schema().to_string();
406406
let actual: Vec<&str> = formatted.trim().lines().collect();
@@ -444,11 +444,11 @@ async fn csv_explain_verbose_plans() {
444444
" {",
445445
" graph[label=\"Detailed LogicalPlan\"]",
446446
" 7[shape=box label=\"Explain\\nSchema: [plan_type:Utf8, plan:Utf8]\"]",
447-
" 8[shape=box label=\"Projection: aggregate_test_100.c1\\nSchema: [c1:Utf8]\"]",
447+
" 8[shape=box label=\"Projection: aggregate_test_100.c1\\nSchema: [c1:Utf8View]\"]",
448448
" 7 -> 8 [arrowhead=none, arrowtail=normal, dir=back]",
449-
" 9[shape=box label=\"Filter: aggregate_test_100.c2 > Int64(10)\\nSchema: [c1:Utf8, c2:Int8, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:UInt32, c10:UInt64, c11:Float32, c12:Float64, c13:Utf8]\"]",
449+
" 9[shape=box label=\"Filter: aggregate_test_100.c2 > Int64(10)\\nSchema: [c1:Utf8View, c2:Int8, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:UInt32, c10:UInt64, c11:Float32, c12:Float64, c13:Utf8View]\"]",
450450
" 8 -> 9 [arrowhead=none, arrowtail=normal, dir=back]",
451-
" 10[shape=box label=\"TableScan: aggregate_test_100\\nSchema: [c1:Utf8, c2:Int8, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:UInt32, c10:UInt64, c11:Float32, c12:Float64, c13:Utf8]\"]",
451+
" 10[shape=box label=\"TableScan: aggregate_test_100\\nSchema: [c1:Utf8View, c2:Int8, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:UInt32, c10:UInt64, c11:Float32, c12:Float64, c13:Utf8View]\"]",
452452
" 9 -> 10 [arrowhead=none, arrowtail=normal, dir=back]",
453453
" }",
454454
"}",
@@ -472,9 +472,9 @@ async fn csv_explain_verbose_plans() {
472472
// Verify schema
473473
let expected = vec![
474474
"Explain [plan_type:Utf8, plan:Utf8]",
475-
" Projection: aggregate_test_100.c1 [c1:Utf8]",
476-
" Filter: aggregate_test_100.c2 > Int8(10) [c1:Utf8, c2:Int8]",
477-
" TableScan: aggregate_test_100 projection=[c1, c2], partial_filters=[aggregate_test_100.c2 > Int8(10)] [c1:Utf8, c2:Int8]",
475+
" Projection: aggregate_test_100.c1 [c1:Utf8View]",
476+
" Filter: aggregate_test_100.c2 > Int8(10) [c1:Utf8View, c2:Int8]",
477+
" TableScan: aggregate_test_100 projection=[c1, c2], partial_filters=[aggregate_test_100.c2 > Int8(10)] [c1:Utf8View, c2:Int8]",
478478
];
479479
let formatted = plan.display_indent_schema().to_string();
480480
let actual: Vec<&str> = formatted.trim().lines().collect();
@@ -518,11 +518,11 @@ async fn csv_explain_verbose_plans() {
518518
" {",
519519
" graph[label=\"Detailed LogicalPlan\"]",
520520
" 7[shape=box label=\"Explain\\nSchema: [plan_type:Utf8, plan:Utf8]\"]",
521-
" 8[shape=box label=\"Projection: aggregate_test_100.c1\\nSchema: [c1:Utf8]\"]",
521+
" 8[shape=box label=\"Projection: aggregate_test_100.c1\\nSchema: [c1:Utf8View]\"]",
522522
" 7 -> 8 [arrowhead=none, arrowtail=normal, dir=back]",
523-
" 9[shape=box label=\"Filter: aggregate_test_100.c2 > Int8(10)\\nSchema: [c1:Utf8, c2:Int8]\"]",
523+
" 9[shape=box label=\"Filter: aggregate_test_100.c2 > Int8(10)\\nSchema: [c1:Utf8View, c2:Int8]\"]",
524524
" 8 -> 9 [arrowhead=none, arrowtail=normal, dir=back]",
525-
" 10[shape=box label=\"TableScan: aggregate_test_100 projection=[c1, c2], partial_filters=[aggregate_test_100.c2 > Int8(10)]\\nSchema: [c1:Utf8, c2:Int8]\"]",
525+
" 10[shape=box label=\"TableScan: aggregate_test_100 projection=[c1, c2], partial_filters=[aggregate_test_100.c2 > Int8(10)]\\nSchema: [c1:Utf8View, c2:Int8]\"]",
526526
" 9 -> 10 [arrowhead=none, arrowtail=normal, dir=back]",
527527
" }",
528528
"}",

datafusion/core/tests/user_defined/user_defined_plan.rs

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -63,15 +63,14 @@ use std::hash::Hash;
6363
use std::task::{Context, Poll};
6464
use std::{any::Any, collections::BTreeMap, fmt, sync::Arc};
6565

66+
use arrow::array::{Array, ArrayRef, StringViewArray};
6667
use arrow::{
67-
array::{Int64Array, StringArray},
68-
datatypes::SchemaRef,
69-
record_batch::RecordBatch,
68+
array::Int64Array, datatypes::SchemaRef, record_batch::RecordBatch,
7069
util::pretty::pretty_format_batches,
7170
};
7271
use datafusion::execution::session_state::SessionStateBuilder;
7372
use datafusion::{
74-
common::cast::{as_int64_array, as_string_array},
73+
common::cast::as_int64_array,
7574
common::{arrow_datafusion_err, internal_err, DFSchemaRef},
7675
error::{DataFusionError, Result},
7776
execution::{
@@ -100,6 +99,7 @@ use datafusion_optimizer::AnalyzerRule;
10099
use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType};
101100

102101
use async_trait::async_trait;
102+
use datafusion_common::cast::as_string_view_array;
103103
use futures::{Stream, StreamExt};
104104

105105
/// Execute the specified sql and return the resulting record batches
@@ -796,22 +796,26 @@ fn accumulate_batch(
796796
k: &usize,
797797
) -> BTreeMap<i64, String> {
798798
let num_rows = input_batch.num_rows();
799+
799800
// Assuming the input columns are
800-
// column[0]: customer_id / UTF8
801+
// column[0]: customer_id UTF8View
801802
// column[1]: revenue: Int64
802-
let customer_id =
803-
as_string_array(input_batch.column(0)).expect("Column 0 is not customer_id");
804803

804+
let customer_id_column = input_batch.column(0);
805805
let revenue = as_int64_array(input_batch.column(1)).unwrap();
806806

807807
for row in 0..num_rows {
808-
add_row(
809-
&mut top_values,
810-
customer_id.value(row),
811-
revenue.value(row),
812-
k,
813-
);
808+
let customer_id = match customer_id_column.data_type() {
809+
arrow::datatypes::DataType::Utf8View => {
810+
let array = as_string_view_array(customer_id_column).unwrap();
811+
array.value(row)
812+
}
813+
_ => panic!("Unsupported customer_id type"),
814+
};
815+
816+
add_row(&mut top_values, customer_id, revenue.value(row), k);
814817
}
818+
815819
top_values
816820
}
817821

@@ -843,11 +847,19 @@ impl Stream for TopKReader {
843847
self.state.iter().rev().unzip();
844848

845849
let customer: Vec<&str> = customer.iter().map(|&s| &**s).collect();
850+
851+
let customer_array: ArrayRef = match schema.field(0).data_type() {
852+
arrow::datatypes::DataType::Utf8View => {
853+
Arc::new(StringViewArray::from(customer))
854+
}
855+
other => panic!("Unsupported customer_id output type: {other:?}"),
856+
};
857+
846858
Poll::Ready(Some(
847859
RecordBatch::try_new(
848860
schema,
849861
vec![
850-
Arc::new(StringArray::from(customer)),
862+
Arc::new(customer_array),
851863
Arc::new(Int64Array::from(revenue)),
852864
],
853865
)

datafusion/functions-aggregate/src/string_agg.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
use crate::array_agg::ArrayAgg;
2121
use arrow::array::ArrayRef;
2222
use arrow::datatypes::{DataType, Field, FieldRef};
23-
use datafusion_common::cast::as_generic_string_array;
23+
use datafusion_common::cast::{as_generic_string_array, as_string_view_array};
2424
use datafusion_common::Result;
2525
use datafusion_common::{internal_err, not_impl_err, ScalarValue};
2626
use datafusion_expr::function::AccumulatorArgs;
@@ -95,9 +95,15 @@ impl StringAgg {
9595
TypeSignature::Exact(vec![DataType::LargeUtf8, DataType::Utf8]),
9696
TypeSignature::Exact(vec![DataType::LargeUtf8, DataType::LargeUtf8]),
9797
TypeSignature::Exact(vec![DataType::LargeUtf8, DataType::Null]),
98+
TypeSignature::Exact(vec![DataType::LargeUtf8, DataType::Utf8View]),
9899
TypeSignature::Exact(vec![DataType::Utf8, DataType::Utf8]),
99100
TypeSignature::Exact(vec![DataType::Utf8, DataType::LargeUtf8]),
100101
TypeSignature::Exact(vec![DataType::Utf8, DataType::Null]),
102+
TypeSignature::Exact(vec![DataType::Utf8, DataType::Utf8View]),
103+
TypeSignature::Exact(vec![DataType::Utf8View, DataType::Utf8View]),
104+
TypeSignature::Exact(vec![DataType::Utf8View, DataType::LargeUtf8]),
105+
TypeSignature::Exact(vec![DataType::Utf8View, DataType::Null]),
106+
TypeSignature::Exact(vec![DataType::Utf8View, DataType::Utf8]),
101107
],
102108
Volatility::Immutable,
103109
),
@@ -211,6 +217,10 @@ impl Accumulator for StringAggAccumulator {
211217
.iter()
212218
.flatten()
213219
.collect(),
220+
DataType::Utf8View => as_string_view_array(list.values())?
221+
.iter()
222+
.flatten()
223+
.collect(),
214224
_ => {
215225
return internal_err!(
216226
"Expected elements to of type Utf8 or LargeUtf8, but got {}",

datafusion/sql/src/planner.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ impl ParserOptions {
7272
parse_float_as_decimal: false,
7373
enable_ident_normalization: true,
7474
support_varchar_with_length: true,
75-
map_varchar_to_utf8view: false,
75+
map_varchar_to_utf8view: true,
7676
enable_options_value_normalization: false,
7777
collect_spans: false,
7878
}

datafusion/sql/tests/sql_integration.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3355,7 +3355,7 @@ fn parse_decimals_parser_options() -> ParserOptions {
33553355
parse_float_as_decimal: true,
33563356
enable_ident_normalization: false,
33573357
support_varchar_with_length: false,
3358-
map_varchar_to_utf8view: false,
3358+
map_varchar_to_utf8view: true,
33593359
enable_options_value_normalization: false,
33603360
collect_spans: false,
33613361
}
@@ -3366,7 +3366,7 @@ fn ident_normalization_parser_options_no_ident_normalization() -> ParserOptions
33663366
parse_float_as_decimal: true,
33673367
enable_ident_normalization: false,
33683368
support_varchar_with_length: false,
3369-
map_varchar_to_utf8view: false,
3369+
map_varchar_to_utf8view: true,
33703370
enable_options_value_normalization: false,
33713371
collect_spans: false,
33723372
}
@@ -3377,7 +3377,7 @@ fn ident_normalization_parser_options_ident_normalization() -> ParserOptions {
33773377
parse_float_as_decimal: true,
33783378
enable_ident_normalization: true,
33793379
support_varchar_with_length: false,
3380-
map_varchar_to_utf8view: false,
3380+
map_varchar_to_utf8view: true,
33813381
enable_options_value_normalization: false,
33823382
collect_spans: false,
33833383
}

datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,9 @@ pub fn convert_schema_to_types(columns: &Fields) -> Vec<DFColumnType> {
292292
if key_type.is_integer() {
293293
// mapping dictionary string types to Text
294294
match value_type.as_ref() {
295-
DataType::Utf8 | DataType::LargeUtf8 => DFColumnType::Text,
295+
DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => {
296+
DFColumnType::Text
297+
}
296298
_ => DFColumnType::Another,
297299
}
298300
} else {

0 commit comments

Comments
 (0)