1- """HTML formatting utilities for DataFusion DataFrames.
2-
3- This module provides a customizable HTML formatter for displaying DataFrames
4- in rich environments like Jupyter notebooks.
5-
6- Examples:
7- Basic usage with the default formatter:
8-
9- >>> import datafusion as df
10- >>> # Create a DataFrame
11- >>> ctx = df.SessionContext()
12- >>> df_obj = ctx.sql("SELECT 1 as id, 'example' as name")
13- >>> # The DataFrame will use the default formatter in Jupyter
14-
15- Configuring the global formatter:
16-
17- >>> from datafusion.html_formatter import configure_formatter
18- >>> configure_formatter(
19- ... max_cell_length=50,
20- ... max_height=500,
21- ... enable_cell_expansion=True
22- ... )
23-
24- Creating a custom formatter with specialized type handling:
25-
26- >>> import datetime
27- >>> from datafusion.html_formatter import (
28- ... DataFrameHtmlFormatter,
29- ... StyleProvider,
30- ... get_formatter
31- ... )
32- >>>
33- >>> # Create a custom date formatter
34- >>> def format_date(date_value):
35- ... return date_value.strftime("%Y-%m-%d")
36- >>>
37- >>> # Create a custom style provider
38- >>> class BlueHeaderStyleProvider(StyleProvider):
39- ... def get_cell_style(self) -> str:
40- ... return "border: 1px solid #ddd; padding: 8px; text-align: left;"
41- ...
42- ... def get_header_style(self) -> str:
43- ... return (
44- ... "border: 1px solid #ddd; padding: 8px; "
45- ... "background-color: #4285f4; color: white; "
46- ... "text-align: left; font-weight: bold;"
47- ... )
48- >>>
49- >>> # Use composition to create a custom formatter
50- >>> formatter = DataFrameHtmlFormatter(
51- ... max_cell_length=100,
52- ... style_provider=BlueHeaderStyleProvider()
53- ... )
54- >>>
55- >>> # Register formatters for specific types
56- >>> formatter.register_formatter(datetime.date, format_date)
57- >>> formatter.register_formatter(float, lambda x: f"{x:.2f}")
58- >>>
59- >>> # Make it the global formatter
60- >>> from datafusion.html_formatter import configure_formatter
61- >>> configure_formatter(
62- ... max_cell_length=100,
63- ... style_provider=BlueHeaderStyleProvider()
64- ... )
65- >>> # Now register the formatters with the global formatter
66- >>> current_formatter = get_formatter()
67- >>> current_formatter.register_formatter(datetime.date, format_date)
68- >>> current_formatter.register_formatter(float, lambda x: f"{x:.2f}")
69-
70- Creating custom cell builders for more complex formatting:
71-
72- >>> # Custom cell builder for numeric values
73- >>> def number_cell_builder(value, row, col, table_id):
74- ... if isinstance(value, (int, float)) and value < 0:
75- ... return f"<td style='background-color: #ffcccc'>{value}</td>"
76- ... elif isinstance(value, (int, float)) and value > 1000:
77- ... return f"<td style='background-color: #ccffcc; font-weight: bold'>{value}</td>"
78- ... else:
79- ... return f"<td>{value}</td>"
80- >>>
81- >>> formatter.set_custom_cell_builder(number_cell_builder)
82- """
1+ """HTML formatting utilities for DataFusion DataFrames."""
832
843from typing import Dict , Optional , Any , Union , List , Callable , Type , Protocol
854
@@ -147,46 +66,6 @@ class DataFrameHtmlFormatter:
14766 custom_css: Additional CSS to include in the HTML output
14867 show_truncation_message: Whether to display a message when data is truncated
14968 style_provider: Custom provider for cell and header styles
150-
151- Example:
152- Create a formatter that adds color-coding for numeric values and custom date formatting:
153-
154- >>> # Create custom style provider
155- >>> class CustomStyleProvider:
156- ... def get_cell_style(self) -> str:
157- ... return "border: 1px solid #ddd; padding: 8px;"
158- ...
159- ... def get_header_style(self) -> str:
160- ... return (
161- ... "border: 1px solid #ddd; padding: 8px; "
162- ... "background-color: #333; color: white;"
163- ... )
164- >>>
165- >>> # Create the formatter with custom styling
166- >>> formatter = DataFrameHtmlFormatter(
167- ... max_cell_length=50,
168- ... style_provider=CustomStyleProvider()
169- ... )
170- >>>
171- >>> # Add custom formatters for specific data types
172- >>> import datetime
173- >>> formatter.register_formatter(
174- ... datetime.date,
175- ... lambda d: f'<span style="color: blue">{d.strftime("%b %d, %Y")}</span>'
176- ... )
177- >>>
178- >>> # Format large numbers with commas
179- >>> formatter.register_formatter(
180- ... int,
181- ... lambda n: f'<span style="font-family: monospace">{n:,}</span>' if n > 1000 else str(n)
182- ... )
183- >>>
184- >>> # Replace the global formatter so all DataFrames use it
185- >>> from datafusion.html_formatter import configure_formatter
186- >>> configure_formatter(
187- ... max_cell_length=50,
188- ... style_provider=CustomStyleProvider()
189- ... )
19069 """
19170
19271 def __init__ (
@@ -288,7 +167,9 @@ def _build_html_header(self) -> List[str]:
288167 """Build the HTML header with CSS styles."""
289168 html = []
290169 html .append ("<style>" )
291- html .append (self ._get_default_css ())
170+ # Only include expandable CSS if cell expansion is enabled
171+ if self .enable_cell_expansion :
172+ html .append (self ._get_default_css ())
292173 if self .custom_css :
293174 html .append (self .custom_css )
294175 html .append ("</style>" )
@@ -332,57 +213,109 @@ def _build_table_body(self, batches: list, table_uuid: str) -> List[str]:
332213 html .append ("<tr>" )
333214
334215 for col_idx , column in enumerate (batch .columns ):
335- cell_value = self ._format_cell_value (column , row_idx )
216+ raw_value = self ._get_cell_value (column , row_idx )
217+ formatted_value = self ._format_cell_value (raw_value )
336218
337219 if (
338- len (str (cell_value )) > self .max_cell_length
220+ len (str (formatted_value )) > self .max_cell_length
339221 and self .enable_cell_expansion
340222 ):
341223 html .append (
342224 self ._build_expandable_cell (
343- cell_value , row_count , col_idx , table_uuid
225+ raw_value ,
226+ formatted_value ,
227+ row_count ,
228+ col_idx ,
229+ table_uuid ,
344230 )
345231 )
346232 else :
347- html .append (self ._build_regular_cell (cell_value ))
233+ html .append (
234+ self ._build_regular_cell (raw_value , formatted_value )
235+ )
348236
349237 html .append ("</tr>" )
350238
351239 html .append ("</tbody>" )
352240 return html
353241
242+ def _get_cell_value (self , column : Any , row_idx : int ) -> Any :
243+ """Extract a cell value from a column.
244+
245+ Args:
246+ column: Arrow array
247+ row_idx: Row index
248+
249+ Returns:
250+ The raw cell value
251+ """
252+ try :
253+ return column [row_idx ]
254+ except (IndexError , TypeError ):
255+ return ""
256+
257+ def _format_cell_value (self , value : Any ) -> str :
258+ """Format a cell value for display.
259+
260+ Uses registered type formatters if available.
261+
262+ Args:
263+ value: The cell value to format
264+
265+ Returns:
266+ Formatted cell value as string
267+ """
268+ # Check for custom type formatters
269+ for type_cls , formatter in self ._type_formatters .items ():
270+ if isinstance (value , type_cls ):
271+ return formatter (value )
272+
273+ return str (value )
274+
354275 def _build_expandable_cell (
355- self , cell_value : Any , row_count : int , col_idx : int , table_uuid : str
276+ self ,
277+ raw_value : Any ,
278+ formatted_value : str ,
279+ row_count : int ,
280+ col_idx : int ,
281+ table_uuid : str ,
356282 ) -> str :
357283 """Build an expandable cell for long content."""
358284 # If custom cell builder is provided, use it
359285 if self ._custom_cell_builder :
360- return self ._custom_cell_builder (cell_value , row_count , col_idx , table_uuid )
286+ return self ._custom_cell_builder (raw_value , row_count , col_idx , table_uuid )
361287
362- short_value = str ( cell_value ) [: self .max_cell_length ]
288+ short_value = formatted_value [: self .max_cell_length ]
363289 return (
364290 f"<td style='{ self .style_provider .get_cell_style ()} '>"
365291 f"<div class='expandable-container'>"
366292 f"<span class='expandable' id='{ table_uuid } -min-text-{ row_count } -{ col_idx } '>"
367293 f"{ short_value } </span>"
368294 f"<span class='full-text' id='{ table_uuid } -full-text-{ row_count } -{ col_idx } '>"
369- f"{ cell_value } </span>"
295+ f"{ formatted_value } </span>"
370296 f"<button class='expand-btn' "
371297 f"onclick=\" toggleDataFrameCellText('{ table_uuid } ',{ row_count } ,{ col_idx } )\" >"
372298 f"...</button>"
373299 f"</div>"
374300 f"</td>"
375301 )
376302
377- def _build_regular_cell (self , cell_value : Any ) -> str :
303+ def _build_regular_cell (self , raw_value : Any , formatted_value : str ) -> str :
378304 """Build a regular table cell."""
379- return f"<td style='{ self .style_provider .get_cell_style ()} '>{ cell_value } </td>"
305+ # If custom cell builder is provided, use it with dummy row/col values
306+ if self ._custom_cell_builder :
307+ # Use 0, 0, "" as dummy values since this isn't an expandable cell
308+ return self ._custom_cell_builder (raw_value , 0 , 0 , "" )
309+
310+ return (
311+ f"<td style='{ self .style_provider .get_cell_style ()} '>{ formatted_value } </td>"
312+ )
380313
381314 def _build_html_footer (self , has_more : bool ) -> List [str ]:
382315 """Build the HTML footer with JavaScript and messages."""
383316 html = []
384317
385- # Add JavaScript for interactivity
318+ # Add JavaScript for interactivity only if cell expansion is enabled
386319 if self .enable_cell_expansion :
387320 html .append (self ._get_javascript ())
388321
@@ -392,30 +325,6 @@ def _build_html_footer(self, has_more: bool) -> List[str]:
392325
393326 return html
394327
395- def _format_cell_value (self , column : Any , row_idx : int ) -> str :
396- """Format a cell value for display.
397-
398- Uses registered type formatters if available.
399-
400- Args:
401- column: Arrow array
402- row_idx: Row index
403-
404- Returns:
405- Formatted cell value as string
406- """
407- try :
408- value = column [row_idx ]
409-
410- # Check for custom type formatters
411- for type_cls , formatter in self ._type_formatters .items ():
412- if isinstance (value , type_cls ):
413- return formatter (value )
414-
415- return str (value )
416- except (IndexError , TypeError ):
417- return ""
418-
419328 def _get_default_css (self ) -> str :
420329 """Get default CSS styles for the HTML table."""
421330 return """
@@ -502,26 +411,7 @@ def configure_formatter(**kwargs: Any) -> None:
502411def set_style_provider (provider : StyleProvider ) -> None :
503412 """Set a custom style provider for the global formatter.
504413
505- This is a convenience function to replace just the style provider
506- of the global formatter instance without changing other settings.
507-
508414 Args:
509415 provider: A StyleProvider implementation
510-
511- Example:
512- >>> from datafusion.html_formatter import set_style_provider
513- >>>
514- >>> class DarkModeStyleProvider:
515- ... def get_cell_style(self) -> str:
516- ... return "border: 1px solid #555; padding: 8px; color: #eee; background-color: #222;"
517- ...
518- ... def get_header_style(self) -> str:
519- ... return (
520- ... "border: 1px solid #555; padding: 8px; "
521- ... "color: white; background-color: #111; font-weight: bold;"
522- ... )
523- >>>
524- >>> # Apply dark mode styling to all DataFrames
525- >>> set_style_provider(DarkModeStyleProvider())
526416 """
527417 _default_formatter .style_provider = provider
0 commit comments