Skip to content

Commit cc9f2ea

Browse files
committed
feat: add possibility to not simplify UDTF args
1 parent a41b9fd commit cc9f2ea

5 files changed

Lines changed: 20 additions & 12 deletions

File tree

datafusion/common/src/config.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,9 @@ config_namespace! {
257257
/// parallel file scanning. Setting this to `true` ensures that newlines in values are
258258
/// parsed successfully, which may reduce performance.
259259
pub newlines_in_values: bool, default = false
260+
261+
/// Specifies whether UDTF arguments expressions must be simplified.
262+
pub simplify_udtf_args: bool, default = true
260263
}
261264
}
262265

datafusion/core/src/execution/session_state.rs

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1844,14 +1844,17 @@ impl ContextProvider for SessionContextProvider<'_> {
18441844
);
18451845
let simplifier = ExprSimplifier::new(simplify_context);
18461846
let schema = DFSchema::empty();
1847-
let args = args
1848-
.into_iter()
1849-
.map(|arg| {
1850-
simplifier
1851-
.coerce(arg, &schema)
1852-
.and_then(|e| simplifier.simplify(e))
1853-
})
1854-
.collect::<datafusion_common::Result<Vec<_>>>()?;
1847+
let args = if !self.state.config.options().catalog.simplify_udtf_args {
1848+
args
1849+
} else {
1850+
args.into_iter()
1851+
.map(|arg| {
1852+
simplifier
1853+
.coerce(arg, &schema)
1854+
.and_then(|e| simplifier.simplify(e))
1855+
})
1856+
.collect::<datafusion_common::Result<Vec<_>>>()?
1857+
};
18551858
let provider = tbl_func.create_table_provider_with_args(TableFunctionArgs {
18561859
args: &args,
18571860
session: self.state,

datafusion/sqllogictest/test_files/information_schema.slt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,7 @@ datafusion.catalog.has_header true
214214
datafusion.catalog.information_schema true
215215
datafusion.catalog.location NULL
216216
datafusion.catalog.newlines_in_values false
217+
datafusion.catalog.simplify_udtf_args true
217218
datafusion.execution.batch_size 8192
218219
datafusion.execution.coalesce_batches true
219220
datafusion.execution.collect_statistics true
@@ -354,6 +355,7 @@ datafusion.catalog.has_header true Default value for `format.has_header` for `CR
354355
datafusion.catalog.information_schema true Should DataFusion provide access to `information_schema` virtual tables for displaying schema information
355356
datafusion.catalog.location NULL Location scanned to load tables for `default` schema
356357
datafusion.catalog.newlines_in_values false Specifies whether newlines in (quoted) CSV values are supported. This is the default value for `format.newlines_in_values` for `CREATE EXTERNAL TABLE` if not specified explicitly in the statement. Parsing newlines in quoted values may be affected by execution behaviour such as parallel file scanning. Setting this to `true` ensures that newlines in values are parsed successfully, which may reduce performance.
358+
datafusion.catalog.simplify_udtf_args true Specifies whether UDTF arguments expressions must be simplified.
357359
datafusion.execution.batch_size 8192 Default batch size while creating new batches, it's especially useful for buffer-in-memory batches since creating tiny batches would result in too much metadata memory consumption
358360
datafusion.execution.coalesce_batches true When set to true, record batches will be examined between each operator and small batches will be coalesced into larger batches. This is helpful when there are highly selective filters or joins that could produce tiny output batches. The target batch size is determined by the configuration setting
359361
datafusion.execution.collect_statistics true Should DataFusion collect statistics when first creating a table. Has no effect after the table is created. Applies to the default `ListingTableProvider` in DataFusion. Defaults to true.

datafusion/sqllogictest/test_files/placeholders.slt

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,9 @@ logical_plan
3737
02)--Projection: column1 AS id, column2 AS name
3838
03)----Values: ($1, Utf8View("Samanta") AS Utf8("Samanta")), (Int32(5) AS Int64(5), $2)
3939
physical_plan
40-
01)TransformPlanExec: rules=[ResolvePlaceholders: plans_to_modify=1]
41-
02)--DataSinkExec: sink=MemoryTable (partitions=1)
42-
03)----ProjectionExec: expr=[column1@0 as id, column2@1 as name]
43-
04)------DataSourceExec: placeholders=2
40+
01)DataSinkExec: sink=MemoryTable (partitions=1)
41+
02)--ProjectionExec: expr=[column1@0 as id, column2@1 as name]
42+
03)----DataSourceExec: placeholders=2
4443

4544
# Filter with multiple placeholders
4645
query TT

docs/source/user-guide/configs.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ The following configuration settings are available:
7373
| datafusion.catalog.format | NULL | Type of `TableProvider` to use when loading `default` schema |
7474
| datafusion.catalog.has_header | true | Default value for `format.has_header` for `CREATE EXTERNAL TABLE` if not specified explicitly in the statement. |
7575
| datafusion.catalog.newlines_in_values | false | Specifies whether newlines in (quoted) CSV values are supported. This is the default value for `format.newlines_in_values` for `CREATE EXTERNAL TABLE` if not specified explicitly in the statement. Parsing newlines in quoted values may be affected by execution behaviour such as parallel file scanning. Setting this to `true` ensures that newlines in values are parsed successfully, which may reduce performance. |
76+
| datafusion.catalog.simplify_udtf_args | true | Specifies whether UDTF arguments expressions must be simplified. |
7677
| datafusion.execution.batch_size | 8192 | Default batch size while creating new batches, it's especially useful for buffer-in-memory batches since creating tiny batches would result in too much metadata memory consumption |
7778
| datafusion.execution.perfect_hash_join_small_build_threshold | 1024 | A perfect hash join (see `HashJoinExec` for more details) will be considered if the range of keys (max - min) on the build side is < this threshold. This provides a fast path for joins with very small key ranges, bypassing the density check. Currently only supports cases where build_side.num_rows() < u32::MAX. Support for build_side.num_rows() >= u32::MAX will be added in the future. |
7879
| datafusion.execution.perfect_hash_join_min_key_density | 0.15 | The minimum required density of join keys on the build side to consider a perfect hash join (see `HashJoinExec` for more details). Density is calculated as: `(number of rows) / (max_key - min_key + 1)`. A perfect hash join may be used if the actual key density > this value. Currently only supports cases where build_side.num_rows() < u32::MAX. Support for build_side.num_rows() >= u32::MAX will be added in the future. |

0 commit comments

Comments
 (0)