Skip to content

Commit 2ef013f

Browse files
committed
refactor: remove hardcoded constants and use FormatterConfig for display options
1 parent d0209cf commit 2ef013f

1 file changed

Lines changed: 14 additions & 11 deletions

File tree

src/dataframe.rs

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -93,10 +93,6 @@ impl Default for FormatterConfig {
9393
}
9494
}
9595

96-
// Keep constants for backward compatibility
97-
const MAX_TABLE_BYTES_TO_DISPLAY: usize = 2 * 1024 * 1024; // 2 MB
98-
const MIN_TABLE_ROWS_TO_DISPLAY: usize = 20;
99-
10096
fn get_formatter_config(py: Python) -> PyResult<FormatterConfig> {
10197
let formatter_module = py.import("datafusion.html_formatter")?;
10298
let get_formatter = formatter_module.getattr("get_formatter")?;
@@ -166,9 +162,14 @@ impl PyDataFrame {
166162
}
167163

168164
fn __repr__(&self, py: Python) -> PyDataFusionResult<String> {
165+
let config = get_formatter_config(py)?;
169166
let (batches, has_more) = wait_for_future(
170167
py,
171-
collect_record_batches_to_display(self.df.as_ref().clone(), 10, 10),
168+
collect_record_batches_to_display(
169+
self.df.as_ref().clone(),
170+
config.repr_rows,
171+
config.repr_rows,
172+
),
172173
)?;
173174
if batches.is_empty() {
174175
// This should not be reached, but do it for safety since we index into the vector below
@@ -187,11 +188,12 @@ impl PyDataFrame {
187188
}
188189

189190
fn _repr_html_(&self, py: Python) -> PyDataFusionResult<String> {
191+
let config = get_formatter_config(py)?;
190192
let (batches, has_more) = wait_for_future(
191193
py,
192194
collect_record_batches_to_display(
193195
self.df.as_ref().clone(),
194-
MIN_TABLE_ROWS_TO_DISPLAY,
196+
config.min_rows,
195197
usize::MAX,
196198
),
197199
)?;
@@ -851,16 +853,17 @@ async fn collect_record_batches_to_display(
851853
min_rows: usize,
852854
max_rows: usize,
853855
) -> Result<(Vec<RecordBatch>, bool), DataFusionError> {
856+
let config = FormatterConfig::default();
857+
let max_bytes = config.max_bytes;
858+
854859
let partitioned_stream = df.execute_stream_partitioned().await?;
855860
let mut stream = futures::stream::iter(partitioned_stream).flatten();
856861
let mut size_estimate_so_far = 0;
857862
let mut rows_so_far = 0;
858863
let mut record_batches = Vec::default();
859864
let mut has_more = false;
860865

861-
while (size_estimate_so_far < MAX_TABLE_BYTES_TO_DISPLAY && rows_so_far < max_rows)
862-
|| rows_so_far < min_rows
863-
{
866+
while (size_estimate_so_far < max_bytes && rows_so_far < max_rows) || rows_so_far < min_rows {
864867
let mut rb = match stream.next().await {
865868
None => {
866869
break;
@@ -873,8 +876,8 @@ async fn collect_record_batches_to_display(
873876
if rows_in_rb > 0 {
874877
size_estimate_so_far += rb.get_array_memory_size();
875878

876-
if size_estimate_so_far > MAX_TABLE_BYTES_TO_DISPLAY {
877-
let ratio = MAX_TABLE_BYTES_TO_DISPLAY as f32 / size_estimate_so_far as f32;
879+
if size_estimate_so_far > max_bytes {
880+
let ratio = max_bytes as f32 / size_estimate_so_far as f32;
878881
let total_rows = rows_in_rb + rows_so_far;
879882

880883
let mut reduced_row_num = (total_rows as f32 * ratio).round() as usize;

0 commit comments

Comments
 (0)