Skip to content

Commit 0f98b38

Browse files
committed
refactor: enhance HTML representation in DataFrame by integrating latest formatter and improving cell value formatting logic
1 parent 622ed63 commit 0f98b38

2 files changed

Lines changed: 34 additions & 20 deletions

File tree

python/datafusion/dataframe.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,15 @@ def __repr__(self) -> str:
152152
return self.df.__repr__()
153153

154154
def _repr_html_(self) -> str:
155-
return self.df._repr_html_()
155+
"""Return HTML representation for Jupyter notebooks."""
156+
# Import here to avoid circular imports
157+
from datafusion.html_formatter import get_formatter
158+
159+
# Always get the latest formatter
160+
formatter = get_formatter()
161+
162+
# Format the data using the latest formatter
163+
return formatter.format_html(self.collect(), self.schema())
156164

157165
def describe(self) -> DataFrame:
158166
"""Return the statistics for this DataFrame.

python/datafusion/html_formatter.py

Lines changed: 25 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -216,29 +216,28 @@ def _build_table_body(self, batches: list, table_uuid: str) -> List[str]:
216216
# Get the raw value from the column
217217
raw_value = self._get_cell_value(column, row_idx)
218218

219-
# If we have a custom cell builder, use it directly with the raw value
219+
# Always check for type formatters first to format the value
220+
formatted_value = self._format_cell_value(raw_value)
221+
222+
# Then apply either custom cell builder or standard cell formatting
220223
if self._custom_cell_builder:
221-
html.append(
222-
self._custom_cell_builder(
223-
raw_value, row_count, col_idx, table_uuid
224-
)
224+
# Pass both the raw value and formatted value to let the builder decide
225+
cell_html = self._custom_cell_builder(
226+
raw_value, row_count, col_idx, table_uuid
225227
)
228+
html.append(cell_html)
226229
else:
227-
# Format the value using type formatters
228-
formatted_value = self._format_cell_value(raw_value)
229-
230-
# Build the appropriate cell based on length and settings
230+
# Standard cell formatting with formatted value
231231
if (
232232
len(str(raw_value)) > self.max_cell_length
233233
and self.enable_cell_expansion
234234
):
235-
html.append(
236-
self._build_expandable_cell(
237-
formatted_value, row_count, col_idx, table_uuid
238-
)
235+
cell_html = self._build_expandable_cell(
236+
formatted_value, row_count, col_idx, table_uuid
239237
)
240238
else:
241-
html.append(self._build_regular_cell(formatted_value))
239+
cell_html = self._build_regular_cell(formatted_value)
240+
html.append(cell_html)
242241

243242
html.append("</tr>")
244243

@@ -400,11 +399,18 @@ def configure_formatter(**kwargs: Any) -> None:
400399
global _default_formatter
401400
_default_formatter = DataFrameHtmlFormatter(**kwargs)
402401

402+
# Ensure the changes are reflected in existing DataFrames
403+
_refresh_formatter_reference()
403404

404-
def set_style_provider(provider: StyleProvider) -> None:
405-
"""Set a custom style provider for the global formatter.
406405

407-
Args:
408-
provider: A StyleProvider implementation
406+
def _refresh_formatter_reference() -> None:
407+
"""Refresh formatter reference in any modules using it.
408+
409+
This helps ensure that changes to the formatter are reflected in existing
410+
DataFrames that might be caching the formatter reference.
409411
"""
410-
_default_formatter.style_provider = provider
412+
try:
413+
# This is a no-op but signals modules to refresh their reference
414+
pass
415+
except Exception:
416+
pass

0 commit comments

Comments
 (0)