11"""HTML formatting utilities for DataFusion DataFrames."""
22
3- from typing import Dict , Optional , Any , Union , List
3+ from typing import Dict , Optional , Any , Union , List , Callable , Type
44
55
66class DataFrameHtmlFormatter :
@@ -9,6 +9,12 @@ class DataFrameHtmlFormatter:
99 This class handles the HTML rendering of DataFrames for display in
1010 Jupyter notebooks and other rich display contexts.
1111
12+ This class is designed to be extended by subclassing. Key extension points:
13+ - Override `get_cell_style()` and `get_header_style()` to customize styling
14+ - Override `_format_cell_value()` to customize value formatting
15+ - Use `register_formatter()` to add custom formatters for specific types
16+ - Override any `_build_*` method to customize component generation
17+
1218 Args:
1319 max_cell_length: Maximum characters to display in a cell before truncation
1420 max_width: Maximum width of the HTML table in pixels
@@ -33,6 +39,44 @@ def __init__(
3339 self .enable_cell_expansion = enable_cell_expansion
3440 self .custom_css = custom_css
3541 self .show_truncation_message = show_truncation_message
42+ # Registry for custom type formatters
43+ self ._type_formatters : Dict [Type , Callable [[Any ], str ]] = {}
44+
45+ def register_formatter (
46+ self , type_class : Type , formatter : Callable [[Any ], str ]
47+ ) -> None :
48+ """Register a custom formatter for a specific data type.
49+
50+ Args:
51+ type_class: The type to register a formatter for
52+ formatter: Function that takes a value of the given type and returns
53+ a formatted string
54+ """
55+ self ._type_formatters [type_class ] = formatter
56+
57+ def get_cell_style (self ) -> str :
58+ """Get the CSS style for regular table cells.
59+
60+ This method can be overridden by subclasses to customize cell styling.
61+
62+ Returns:
63+ CSS style string
64+ """
65+ return "border: 1px solid black; padding: 8px; text-align: left; white-space: nowrap;"
66+
67+ def get_header_style (self ) -> str :
68+ """Get the CSS style for table header cells.
69+
70+ This method can be overridden by subclasses to customize header styling.
71+
72+ Returns:
73+ CSS style string
74+ """
75+ return (
76+ "border: 1px solid black; padding: 8px; text-align: left; "
77+ "background-color: #f2f2f2; white-space: nowrap; min-width: fit-content; "
78+ "max-width: fit-content;"
79+ )
3680
3781 def format_html (
3882 self ,
@@ -104,12 +148,7 @@ def _build_table_header(self, schema: Any) -> List[str]:
104148 html .append ("<thead>" )
105149 html .append ("<tr>" )
106150 for field in schema :
107- html .append (
108- "<th style='border: 1px solid black; padding: 8px; "
109- "text-align: left; background-color: #f2f2f2; "
110- "white-space: nowrap; min-width: fit-content; "
111- f"max-width: fit-content;'>{ field .name } </th>"
112- )
151+ html .append (f"<th style='{ self .get_header_style ()} '>{ field .name } </th>" )
113152 html .append ("</tr>" )
114153 html .append ("</thead>" )
115154 return html
@@ -151,8 +190,7 @@ def _build_expandable_cell(
151190 """Build an expandable cell for long content."""
152191 short_value = str (cell_value )[: self .max_cell_length ]
153192 return (
154- f"<td style='border: 1px solid black; padding: 8px; "
155- f"text-align: left; white-space: nowrap;'>"
193+ f"<td style='{ self .get_cell_style ()} '>"
156194 f"<div class='expandable-container'>"
157195 f"<span class='expandable' id='{ table_uuid } -min-text-{ row_count } -{ col_idx } '>"
158196 f"{ short_value } </span>"
@@ -167,10 +205,7 @@ def _build_expandable_cell(
167205
168206 def _build_regular_cell (self , cell_value : Any ) -> str :
169207 """Build a regular table cell."""
170- return (
171- f"<td style='border: 1px solid black; padding: 8px; "
172- f"text-align: left; white-space: nowrap;'>{ cell_value } </td>"
173- )
208+ return f"<td style='{ self .get_cell_style ()} '>{ cell_value } </td>"
174209
175210 def _build_html_footer (self , has_more : bool ) -> List [str ]:
176211 """Build the HTML footer with JavaScript and messages."""
@@ -189,17 +224,25 @@ def _build_html_footer(self, has_more: bool) -> List[str]:
189224 def _format_cell_value (self , column : Any , row_idx : int ) -> str :
190225 """Format a cell value for display.
191226
227+ This method can be overridden by subclasses to customize cell formatting.
228+ It also checks for registered type formatters before falling back to str().
229+
192230 Args:
193231 column: Arrow array
194232 row_idx: Row index
195233
196234 Returns:
197235 Formatted cell value as string
198236 """
199- # This is a simplified implementation for Python-side formatting
200- # In practice, we'd want to handle different Arrow types appropriately
201237 try :
202- return str (column [row_idx ])
238+ value = column [row_idx ]
239+
240+ # Check for custom type formatters
241+ for type_cls , formatter in self ._type_formatters .items ():
242+ if isinstance (value , type_cls ):
243+ return formatter (value )
244+
245+ return str (value )
203246 except (IndexError , TypeError ):
204247 return ""
205248
0 commit comments